dc.identifier.citation | @inproceedings{Aggarwal2001, author="Aggarwal, Charu C. and Hinneburg, Alexander and Keim, Daniel A.", editor="Van den Bussche, Jan and Vianu, Victor", title="On the Surprising Behavior of Distance Metrics in High Dimensional Space", booktitle="Database Theory --- ICDT 2001", year="2001", publisher="Springer Berlin Heidelberg", address="Berlin, Heidelberg", pages="420--434", } @article{McInnes2018, author = {McInnes, Leland and Healy, John}, year = {2018}, month = {02}, pages = {}, title = {UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction} } @InProceedings{Campello2013, author="Campello, Ricardo J. G. B. and Moulavi, Davoud and Sander, Joerg", editor="Pei, Jian and Tseng, Vincent S. and Cao, Longbing and Motoda, Hiroshi and Xu, Guandong", title="Density-Based Clustering Based on Hierarchical Density Estimates", booktitle="Advances in Knowledge Discovery and Data Mining", year="2013", publisher="Springer Berlin Heidelberg", address="Berlin, Heidelberg", pages="160--172", abstract="We propose a theoretically and practically improved density-based, hierarchical clustering method, providing a clustering hierarchy from which a simplified tree of significant clusters can be constructed. For obtaining a ``flat'' partition consisting of only the most significant clusters (possibly corresponding to different density thresholds), we propose a novel cluster stability measure, formalize the problem of maximizing the overall stability of selected clusters, and formulate an algorithm that computes an optimal solution to this problem. We demonstrate that our approach outperforms the current, state-of-the-art, density-based clustering methods on a wide variety of real world data.", isbn="978-3-642-37456-2" } @online{Grootendorst2020, author = {Maarten Grootendorst}, title = {Topic Modeling with BERT}, year = 2020, url = {https://towardsdatascience.com/topic-modeling-with-bert-779f7db187e6}, urldate = {2020-10-1} } @article{Angelov2020, title={Top2Vec: Distributed Representations of Topics}, author={Dimitar Angelov}, journal={ArXiv}, year={2020}, volume={abs/2008.09470} } @online{Borrelli2021, author = {David Borrelli}, title = {Clustering sentence embeddings to identify intents in short text}, year = 2021, url = {https://towardsdatascience.com/clustering-sentence-embeddings-to-identify-intents-in-short-text-48d22d3bf02e}, urldate = {2021-10-19} } @article{mueller-1970, title={Presidential Popularity from Truman to Johnson}, volume={64}, DOI={10.2307/1955610}, number={1}, journal={American Political Science Review}, publisher={Cambridge University Press}, author={Mueller, John E.}, year={1970}, pages={18–34}} @article{hetherington-nelson-2003, title={Anatomy of a Rally Effect: George W. Bush and the War on Terrorism}, volume={36}, DOI={10.1017/S1049096503001665}, number={1}, journal={PS: Political Science & Politics}, publisher={Cambridge University Press}, author={Hetherington, Marc J. and Nelson, Michael}, year={2003}, pages={37–42}} @article{baker, author = {William D. Baker and John R. Oneal}, title ={Patriotism or Opinion Leadership?: The Nature and Origins of the “Rally 'Round the Flag” Effect}, journal = {Journal of Conflict Resolution}, volume = {45}, number = {5}, pages = {661-687}, year = {2001}, doi = {10.1177/0022002701045005006}, URL = { https://doi.org/10.1177/0022002701045005006 }, eprint = { https://doi.org/10.1177/0022002701045005006 } , abstract = { In this study, the “rally effect”—the propensity for the American public to put aside political differences and support the president during international crises—is measured by considering the changes in presidential popularity following all 193 Militarized Interstate Disputes (MIDs) between 1933 and 1992 as identified by the Correlates of War project. Summary analyses find minor, statistically insignificant rallies associated with uses of force, although sizable rallies are associated with particular subcategories of military crises. However, larger rallies are associated with the United States as both revisionist and originator of the dispute, with the initiation of a full interstate war, and with prominent headline placement in the New York Times. Regression analyses indicate that rallies are more likely when they are associated with White House statements and bipartisan support for the administration's policies. Findings suggest that the size and appearance of a rally depends primarily on how the crisis is presented to the public in terms of media coverage, bipartisan support, and White House spin. } } @book{goldstein2008principles, title={Principles of international relations}, author={Goldstein, Joshua S and Pevehouse, Jon C and Sernau, Scott}, year={2008}, publisher={Pearson Longman} } @article{Hoff2002, author = {Peter D Hoff and Adrian E Raftery and Mark S Handcock}, title = {Latent Space Approaches to Social Network Analysis}, journal = {Journal of the American Statistical Association}, volume = {97}, number = {460}, pages = {1090-1098}, year = {2002}, publisher = {Taylor \& Francis}, doi = {10.1198/016214502388618906}, URL = { https://doi.org/10.1198/016214502388618906 }, eprint = { https://doi.org/10.1198/016214502388618906 } , abstract = { Network models are widely used to represent relational information among interacting units. In studies of social networks, recent emphasis has been placed on random graph models where the nodes usually represent individual social actors and the edges represent the presence of a specified relation between actors. We develop a class of models where the probability of a relation between actors depends on the positions of individuals in an unobserved “social space.” We make inference for the social space within maximum likelihood and Bayesian frameworks, and propose Markov chain Monte Carlo procedures for making inference on latent positions and the effects of observed covariates. We present analyses of three standard datasets from the social networks literature, and compare the method to an alternative stochastic blockmodeling approach. In addition to improving on model fit for these datasets, our method provides a visual and interpretable model-based spatial representation of social relationships and improves on existing methods by allowing the statistical uncertainty in the social space to be quantified and graphically represented. } } @article{Barbera2015, author = {Pablo Barberá and John T. Jost and Jonathan Nagler and Joshua A. Tucker and Richard Bonneau}, title ={Tweeting From Left to Right: Is Online Political Communication More Than an Echo Chamber?}, journal = {Psychological Science}, volume = {26}, number = {10}, pages = {1531-1542}, year = {2015}, doi = {10.1177/0956797615594620}, note ={PMID: 26297377}, URL = { https://doi.org/10.1177/0956797615594620 }, eprint = { https://doi.org/10.1177/0956797615594620 } , abstract = { We estimated ideological preferences of 3.8 million Twitter users and, using a dataset of nearly 150 million tweets concerning 12 political and nonpolitical issues, explored whether online communication resembles an “echo chamber” (as a result of selective exposure and ideological segregation) or a “national conversation.” We observed that information was exchanged primarily among individuals with similar ideological preferences in the case of political issues (e.g., 2012 presidential election, 2013 government shutdown) but not many other current events (e.g., 2013 Boston Marathon bombing, 2014 Super Bowl). Discussion of the Newtown shootings in 2012 reflected a dynamic process, beginning as a national conversation before transforming into a polarized exchange. With respect to both political and nonpolitical issues, liberals were more likely than conservatives to engage in cross-ideological dissemination; this is an important asymmetry with respect to the structure of communication that is consistent with psychological theory and research bearing on ideological differences in epistemic, existential, and relational motivation. Overall, we conclude that previous work may have overestimated the degree of ideological segregation in social-media usage. } } @article{Kursuncu2019, author = {Kursuncu, Ugur and Gaur, Manas and Castillo, Carlos and Alambo, Amanuel and Thirunarayan, Krishnaprasad and Shalin, Valerie and Achilov, Dilshod and Arpinar, I. Budak and Sheth, Amit}, title = {Modeling Islamist Extremist Communications on Social Media Using Contextual Dimensions: Religion, Ideology, and Hate}, year = {2019}, issue_date = {November 2019}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {3}, number = {CSCW}, url = {https://doi.org/10.1145/3359253}, doi = {10.1145/3359253}, abstract = {Terror attacks have been linked in part to online extremist content. Online conversations are cloaked in religious ambiguity, with deceptive intentions, often twisted from mainstream meaning to serve a malevolent ideology. Although tens of thousands of Islamist extremism supporters consume such content, they are a small fraction relative to peaceful Muslims. The efforts to contain the ever-evolving extremism on social media platforms have remained inadequate and mostly ineffective. Divergent extremist and mainstream contexts challenge machine interpretation, with a particular threat to the precision of classification algorithms. Radicalization is a subtle long-running persuasive process that occurs over time. Our context-aware computational approach to the analysis of extremist content on Twitter breaks down this persuasion process into building blocks that acknowledge inherent ambiguity and sparsity that likely challenge both manual and automated classification. Based on prior empirical and qualitative research in social sciences, particularly political science, we model this process using a combination of three contextual dimensions -- religion, ideology, and hate -- each elucidating a degree of radicalization and highlighting independent features to render them computationally accessible. We utilize domain-specific knowledge resources for each of these contextual dimensions such as Qur'an for religion, the books of extremist ideologues and preachers for political ideology and a social media hate speech corpus for hate. The significant sensitivity of the Islamist extremist ideology and its local and global security implications require reliable algorithms for modelling such communications on Twitter. Our study makes three contributions to reliable analysis: (i) Development of a computational approach rooted in the contextual dimensions of religion, ideology, and hate, which reflects strategies employed by online Islamist extremist groups, (ii) An in-depth analysis of relevant tweet datasets with respect to these dimensions to exclude likely mislabeled users, and (iii) A framework for understanding online radicalization as a process to assist counter-programming. Given the potentially significant social impact, we evaluate the performance of our algorithms to minimize mislabeling, where our context-aware approach outperforms a competitive baseline by 10.2\% in precision, thereby enhancing the potential of such tools for use in human review.}, journal = {Proc. ACM Hum.-Comput. Interact.}, month = {nov}, articleno = {151}, numpages = {22}, keywords = {multi-dimensional modeling, islamist extremism, contextual dimensions, user modeling, radicalization} } @inproceedings{Arora2017, title={A Simple but Tough-to-Beat Baseline for Sentence Embeddings}, author={Sanjeev Arora and Yingyu Liang and Tengyu Ma}, booktitle={ICLR}, year={2017} } @inproceedings{gu2021exploiting, title={Exploiting behavioral consistence for universal user representation}, author={Gu, Jie and Wang, Feng and Sun, Qinghui and Ye, Zhiquan and Xu, Xiaoxiao and Chen, Jingmin and Zhang, Jun}, booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, volume={35}, number={5}, pages={4063--4071}, year={2021} } @inproceedings{amir2016modelling, title={Modelling Context with User Embeddings for Sarcasm Detection in Social Media}, author={Amir, Silvio and Wallace, Byron C and Lyu, Hao and Carvalho, Paula and Silva, Mario J}, booktitle={Proceedings of The 20th SIGNLL Conference on Computational Natural Language Learning}, pages={167--177}, year={2016} } @inproceedings{preoctiuc2017beyond, title={Beyond binary labels: political ideology prediction of twitter users}, author={Preo{\c{t}}iuc-Pietro, Daniel and Liu, Ye and Hopkins, Daniel and Ungar, Lyle}, booktitle={Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)}, pages={729--740}, year={2017} } @inproceedings{ding2017multi, title={Multi-view unsupervised user feature embedding for social media-based substance use prediction}, author={Ding, Tao and Bickel, Warren K and Pan, Shimei}, booktitle={Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing}, pages={2275--2284}, year={2017} } @article{pan2019social, title={Social media-based user embedding: A literature review}, author={Pan, Shimei and Ding, Tao}, journal={Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence (IJCAI-19)}, year={2019} } @inproceedings{garimella2017, title={A long-term analysis of polarization on Twitter}, author={Garimella, Venkata Rama Kiran and Weber, Ingmar}, booktitle={Eleventh international AAAI conference on web and social media}, year={2017} } @article{Esteban1994, ISSN = {00129682, 14680262}, URL = {http://www.jstor.org/stable/2951734}, abstract = {Suppose that a population of individuals may be grouped according to some vector of characteristics into "clusters," such that each cluster is very "similar" in terms of the attributes of its members, but different clusters have members with very "dissimilar" attributes. In that case we say that the society is polarized. Our purpose is to study polarization, and to provide a theory of its measurement. Our contention is that polarization, as conceptualized here, is closely related to the generation of social tensions, to the possibilities of revolution and revolt, and to the existence of social unrest in general. We take special care to distinguish our theory from the theory of inequality measurement. We derive measures of polarization that are easily applicable to distributions of characteristics such as income and wealth.}, author = {Joan-María Esteban and Debraj Ray}, journal = {Econometrica}, number = {4}, pages = {819--851}, publisher = {[Wiley, Econometric Society]}, title = {On the Measurement of Polarization}, urldate = {2022-08-09}, volume = {62}, year = {1994} } @article{karami2022estimate, title={Estimating Topic Exposure for Under-Represented Users on Social Media}, author={Karami, Mansooreh and Mosallanezhad, Ahmadreza and Sheth, Paras and Liu, Huan}, journal={arXiv preprint arXiv:2208.03796}, year={2022} } @inproceedings{amir2017quantifying, title={Quantifying mental health from social media with neural user embeddings}, author={Amir, Silvio and Coppersmith, Glen and Carvalho, Paula and Silva, Mario J and Wallace, Bryon C}, booktitle={Machine Learning for Healthcare Conference}, pages={306--321}, year={2017}, organization={PMLR} } @inproceedings{karami2021profiling, title={Profiling Fake News Spreaders on Social Media through Psychological and Motivational Factors}, author={Karami, Mansooreh and Nazer, Tahora H and Liu, Huan}, booktitle={Proceedings of the 32nd ACM Conference on Hypertext and Social Media}, pages={225--230}, year={2021} } @inproceedings{zhang2018anrl, title={ANRL: attributed network representation learning via deep neural networks.}, author={Zhang, Zhen and Yang, Hongxia and Bu, Jiajun and Zhou, Sheng and Yu, Pinggang and Zhang, Jianwei and Ester, Martin and Wang, Can}, booktitle={Ijcai}, volume={18}, pages={3155--3161}, year={2018} } @inproceedings{wang2017community, title={Community preserving network embedding}, author={Wang, Xiao and Cui, Peng and Wang, Jing and Pei, Jian and Zhu, Wenwu and Yang, Shiqiang}, booktitle={Thirty-first AAAI conference on artificial intelligence}, year={2017} } @inproceedings{ding2018predicting, title={Predicting delay discounting from social media likes with unsupervised feature learning}, author={Ding, Tao and Bickel, Warren K and Pan, Shimei}, booktitle={2018 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)}, pages={254--257}, year={2018}, organization={IEEE} } @article{jiang2021social, title={Social media polarization and echo chambers in the context of COVID-19: Case study}, author={Jiang, Julie and Ren, Xiang and Ferrara, Emilio and others}, journal={JMIRx med}, volume={2}, number={3}, pages={e29570}, year={2021}, publisher={JMIR Publications Inc., Toronto, Canada} } @article{Muller2020, author = {Martin M{\"{u}}ller and Marcel Salath{\'{e}}}, title = {Addressing machine learning concept drift reveals declining vaccine sentiment during the {COVID-19} pandemic}, journal = {CoRR}, volume = {abs/2012.02197}, year = {2020}, url = {https://arxiv.org/abs/2012.02197}, eprinttype = {arXiv}, eprint = {2012.02197}, timestamp = {Wed, 09 Dec 2020 15:29:05 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2012-02197.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @online{Coleman2020, author = {Ben Coleman}, title = {Why is it Okay to Average Embeddings?}, year = {2020}, url = {https://randorithms.com/2020/11/17/Adding-Embeddings.html}, urldate = {2020-11-17} } @article{jiang2021mechanisms, title={Mechanisms and Attributes of Echo Chambers in Social Media}, author={Jiang, Bohan and Karami, Mansooreh and Cheng, Lu and Black, Tyler and Liu, Huan}, journal={arXiv preprint arXiv:2106.05401}, year={2021} } @online{Pew2020, author = {Pew Research Center}, title = {Differences in How Democrats and Republicans Behave on Twitter}, year = {2020}, url = {https://www.pewresearch.org/politics/2020/10/15/differences-in-how-democrats-and-republicans-behave-on-twitter/}, urldate = {2020-10-15} } @article{moraffah2020causal, title={Causal interpretability for machine learning-problems, methods and evaluation}, author={Moraffah, Raha and Karami, Mansooreh and Guo, Ruocheng and Raglin, Adrienne and Liu, Huan}, journal={ACM SIGKDD Explorations Newsletter}, volume={22}, number={1}, pages={18--33}, year={2020}, publisher={ACM New York, NY, USA} } % Faisal Begins... @inproceedings{boyd2010tweet, title={Tweet, tweet, retweet: Conversational aspects of retweeting on twitter}, author={Boyd, Danah and Golder, Scott and Lotan, Gilad}, booktitle={2010 43rd Hawaii international conference on system sciences}, pages={1--10}, year={2010}, organization={IEEE} } @article{blondel2008fast, title = {Fast unfolding of communities in large networks}, author = {Blondel, Vincent D and Guillaume, Jean-Loup and Lambiotte, Renaud and Lefebvre, Etienne}, journal = {Journal of statistical mechanics: theory and experiment}, volume = {2008}, number = {10}, pages = {P10008}, year = {2008}, publisher = {IOP Publishing} } @article{Morini2021Standard, title = {Toward a Standard Approach for Echo Chamber Detection: Reddit Case Study}, author = {Morini, Virginia and Pollacci, Laura and Rossetti, Giulio}, year = {2021}, date = {2021-01}, journal = {Applied Sciences}, volume = {11}, number = {12}, pages = {5390}, publisher = {Multidisciplinary Digital Publishing Institute}, issn = {2076-3417}, doi = {10.3390/app11125390}, issue = {12} } @article{Koc2018Triadic, title = {Triadic Co-Clustering of Users, Issues and Sentiments in Political Tweets}, author = {Ko\c{c}, Sefa \c{S}ahin and \"Ozer, Mert and Toroslu, \.Ismail Hakk\i{} and Davulcu, Hasan and Jordan, Jeremy}, year = {2018}, journal = {Expert Systems with Applications}, volume = {100}, pages = {79--94}, issn = {0957-4174}, doi = {10.1016/j.eswa.2018.01.043} } @article{Cinelli2021Echo, title = {The Echo Chamber Effect on Social Media}, author = {Cinelli, Matteo and De Francisci Morales, Gianmarco and Galeazzi, Alessandro and Quattrociocchi, Walter and Starnini, Michele}, year = {2021}, date = {2021-03-02}, journal = {Proceedings of the National Academy of Sciences}, volume = {118}, number = {9}, pages = {e2023301118}, publisher = {Proceedings of the National Academy of Sciences}, doi = {10.1073/pnas.2023301118} } @article{Colleoni2014Echo, title = {Echo Chamber or Public Sphere? Predicting Political Orientation and Measuring Political Homophily in Twitter Using Big Data}, author = {Colleoni, Elanor and Rozza, Alessandro and Arvidsson, Adam}, year = {2014}, date = {2014-04-01}, journal = {Journal of Communication}, volume = {64}, number = {2}, pages = {317--332}, issn = {0021-9916}, doi = {10.1111/jcom.12084} } @article{Schmidt2017Anatomya, title = {Anatomy of News Consumption on Facebook}, author = {Schmidt, Ana Luc\'ia and Zollo, Fabiana and Del Vicario, Michela and Bessi, Alessandro and Scala, Antonio and Caldarelli, Guido and Stanley, H. Eugene and Quattrociocchi, Walter}, year = {2017}, date = {2017-03-21}, journal = {Proceedings of the National Academy of Sciences}, volume = {114}, number = {12}, pages = {3035--3039}, publisher = {Proceedings of the National Academy of Sciences}, doi = {10.1073/pnas.1617052114} } @article{Bakshy2015Exposurea, title = {Exposure to Ideologically Diverse News and Opinion on Facebook}, author = {Bakshy, Eytan and Messing, Solomon and Adamic, Lada A.}, year = {2015}, date = {2015-06-05}, journal = {Science}, volume = {348}, number = {6239}, pages = {1130--1132}, publisher = {American Association for the Advancement of Science}, doi = {10.1126/science.aaa1160} } @article{nickerson1998confirmation, title={Confirmation bias: A ubiquitous phenomenon in many guises}, author={Nickerson, Raymond S}, journal={Review of general psychology}, volume={2}, number={2}, pages={175--220}, year={1998}, publisher={SAGE Publications Sage CA: Los Angeles, CA} } @article{klapper1960effects, title={The effects of mass communication.}, author={Klapper, Joseph T}, year={1960}, publisher={Free Press} } @article{DelVicario2016Spreading, title = {The Spreading of Misinformation Online}, author = {Del Vicario, Michela and Bessi, Alessandro and Zollo, Fabiana and Petroni, Fabio and Scala, Antonio and Caldarelli, Guido and Stanley, H. Eugene and Quattrociocchi, Walter}, year = {2016}, date = {2016-01-19}, journal = {Proceedings of the National Academy of Sciences}, volume = {113}, number = {3}, pages = {554--559}, publisher = {Proceedings of the National Academy of Sciences}, doi = {10.1073/pnas.1517441113} } @article{Vicario2019Polarization, title = {Polarization and Fake News: Early Warning of Potential Misinformation Targets}, author = {Vicario, Michela Del and Quattrociocchi, Walter and Scala, Antonio and Zollo, Fabiana}, year = {2019}, date = {2019-03-27}, journal = {ACM Transactions on the Web}, volume = {13}, number = {2}, pages = {10:1--10:22}, issn = {1559-1131}, doi = {10.1145/3316809} } @article{shu2017fake, title={Fake news detection on social media: A data mining perspective}, author={Shu, Kai and Sliva, Amy and Wang, Suhang and Tang, Jiliang and Liu, Huan}, journal={ACM SIGKDD explorations newsletter}, volume={19}, number={1}, pages={22--36}, year={2017}, publisher={ACM New York, NY, USA} } @article{shu2020combating, title={Combating disinformation in a social media age}, author={Shu, Kai and Bhattacharjee, Amrita and Alatawi, Faisal and Nazer, Tahora H and Ding, Kaize and Karami, Mansooreh and Liu, Huan}, journal={Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery}, volume={10}, number={6}, pages={e1385}, year={2020}, publisher={Wiley Online Library} } @inproceedings{Calderon2019ContentBased, title = {Content-Based Echo Chamber Detection on Social Media Platforms}, booktitle = {2019 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)}, author = {Calder\'on, Fernando H. and Cheng, Li-Kai and Lin, Ming-Jen and Huang, Yen-Hao and Chen, Yi-Shin}, year = {2019}, date = {2019-08}, pages = {597--600}, issn = {2473-991X}, doi = {10.1145/3341161.3343689}, journal = {2019 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)} } @article{Villa2021Echo, title = {Echo Chamber Detection and Analysis}, author = {Villa, Giacomo and Pasi, Gabriella and Viviani, Marco}, year = {2021}, date = {2021-08-21}, journal = {Social Network Analysis and Mining}, volume = {11}, number = {1}, pages = {78}, issn = {1869-5469}, doi = {10.1007/s13278-021-00779-3} } @inproceedings{conover2011predicting, title={Predicting the political alignment of twitter users}, author={Conover, Michael D and Gon{\c{c}}alves, Bruno and Ratkiewicz, Jacob and Flammini, Alessandro and Menczer, Filippo}, booktitle={2011 IEEE third international conference on privacy, security, risk and trust and 2011 IEEE third international conference on social computing}, pages={192--199}, year={2011}, organization={IEEE} } @article{garimella2018quantifying, title={Quantifying controversy on social media}, author={Garimella, Kiran and Morales, Gianmarco De Francisci and Gionis, Aristides and Mathioudakis, Michael}, journal={ACM Transactions on Social Computing}, volume={1}, number={1}, pages={1--27}, year={2018}, publisher={ACM New York, NY, USA} } @software{Edler-The-MapEquation-software-2022, author = {Edler, Daniel and Eriksson, Anton and Rosvall, Martin}, month = {8}, title = {{The MapEquation software package}}, url = {https://mapequation.org}, version = {2.6.0}, year = {2022} } @inproceedings{pares2017fluid, title={Fluid communities: A competitive, scalable and diverse community detection algorithm}, author={Par{\'e}s, Ferran and Gasulla, Dario Garcia and Vilalta, Armand and Moreno, Jonatan and Ayguad{\'e}, Eduard and Labarta, Jes{\'u}s and Cort{\'e}s, Ulises and Suzumura, Toyotaro}, booktitle={International conference on complex networks and their applications}, pages={229--240}, year={2017}, organization={Springer} } @article{karypis1997metis, title={METIS: Unstructured graph partitioning and sparse matrix ordering system}, author={Karypis, George}, journal={Technical report}, year={1997}, publisher={Department of Computer Science, University of Minnesota} } % Faisal Ends... @online{DataReportal2022countries, author = {DataReportal}, title = {Which countries have the most Twitter users in 2022?}, year = {2022}, url = {https://datareportal.com/essential-twitter-stats}, urldate = {2022-01-01} } @inproceedings{cossard2020falling, title={Falling into the echo chamber: the Italian vaccination debate on Twitter}, author={Cossard, Alessandro and Morales, Gianmarco De Francisci and Kalimeri, Kyriaki and Mejova, Yelena and Paolotti, Daniela and Starnini, Michele}, booktitle={Proceedings of the International AAAI conference on web and social media}, volume={14}, pages={130--140}, year={2020} } @article{Ghojogh2021UMAP, author = {Ghojogh, Benyamin and Ghodsi, Ali and Karray, Fakhri and Crowley, Mark}, year = {2021}, month = {08}, pages = {}, title = {Uniform Manifold Approximation and Projection (UMAP) and its Variants: Tutorial and Survey} } @inproceedings{Garimella2018partisan, author = {Garimella, Kiran and Morales, Gianmarco and Gionis, Aristides and Mathioudakis, Michael}, year = {2018}, month = {04}, pages = {913-922}, title = {Political Discourse on Social Media: Echo Chambers, Gatekeepers, and the Price of Bipartisanship}, isbn = {978-1-4503-5639-8}, journal = {WWW '18: Proceedings of the 2018 World Wide Web Conference}, doi = {10.1145/3178876.3186139} } @article{Kou2017cscw, author = {Kou, Yubo and Kow, Yong Ming and Gui, Xinning and Cheng, Waikuen}, title = {One Social Movement, Two Social Media Sites: A Comparative Study of Public Discourses}, year = {2017}, issue_date = {December 2017}, publisher = {Kluwer Academic Publishers}, address = {USA}, volume = {26}, number = {4–6}, issn = {0925-9724}, url = {https://doi.org/10.1007/s10606-017-9284-y}, doi = {10.1007/s10606-017-9284-y}, abstract = {Social media have become central places where public discourses are generated, sustained, and circulated around public events. So far, much research has examined large-scale dissemination patterns of prominent statements, opinions, and slogans circulated on social media, such as the analysis of keywords and hashtags on Twitter regarding a political event. However, little attention has been paid to understanding how local socio-cultural-political conditions influence the formation and development of public discourses on social media. To explore this question, we analyzed public discourses about Hong Kong's Umbrella Movement on two distinct social media sites, Facebook and Weibo, the largest micro-blogging service in China. Facebook topped Hong Kong citizens' usage of social media sites, while Weibo's primary user base is mainland Chinese. The social movement and these two social media sites provide a unique opportunity to explore the commonalities and differences between social media discourses generated by two different cultures. Using grounded theory and discourse analysis, we reveal how people on two sites reasoned about the many incidents of the movement and developed sometimes similar but other times strikingly different discourses. We trace the links between different discourses and the socio-cultural-political conditions of Hong Kong and mainland China. We discuss how this study may contribute deeper understandings of public discourses on social media to the CSCW literature.}, journal = {Comput. Supported Coop. Work}, month = {dec}, pages = {807–836}, numpages = {30}, keywords = {Synchronicity, Localness of social media discourse, Facebook, Weibo, Social media, Hong Kong, Discourse analysis, Umbrella movement, Public discourse, China} } @inproceedings{Liao2014cscw, author = {Liao, Q. Vera and Fu, Wai-Tat}, title = {Can You Hear Me Now? Mitigating the Echo Chamber Effect by Source Position Indicators}, year = {2014}, isbn = {9781450325400}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/2531602.2531711}, doi = {10.1145/2531602.2531711}, abstract = {We examined how a source position indicator showing both valences (pro/con) and magnitudes (moderate/extreme) of positions on controversial topics influenced users' selection and reception of diverse opinions in online discussions. Results showed that the indicator had differential impact on participants who had varied levels of accuracy motives -- i.e., motivation to accurately learn about the topic, by leading to greater exposure to attitude-challenging information for participants with higher accuracy motives. Further analysis revealed that it was mainly caused by the fact that the presence of position indicator increased the selection of moderately inconsistent sources for participants with high accuracy motives but decreased the selection of them for participants with low accuracy motives. The indicator also helped participants differentiate between sources with moderate and extreme positions, and increased their tendency to agree with attitude-challenging information from sources with moderately inconsistent positions. Participants with high accuracy motives were also found to learn significantly more about the arguments put forward by the opposite side with the help of the position indicator. We discussed the implications of the results for the nature of the echo chamber effect, as well as for designing information systems that encourage seeking of diverse information and common ground seeking.}, booktitle = {Proceedings of the 17th ACM Conference on Computer Supported Cooperative Work \& Social Computing}, pages = {184–196}, numpages = {13}, keywords = {selective exposure, motivation, information diversity}, location = {Baltimore, Maryland, USA}, series = {CSCW '14} } @inproceedings{Semaan2014cscw, author = {Semaan, Bryan C. and Robertson, Scott P. and Douglas, Sara and Maruyama, Misa}, title = {Social Media Supporting Political Deliberation across Multiple Public Spheres: Towards Depolarization}, year = {2014}, isbn = {9781450325400}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/2531602.2531605}, doi = {10.1145/2531602.2531605}, abstract = {This paper reports on a qualitative study of social media use for political deliberation by 21 U.S. citizens. In observing people's interactions in the "sprawling public sphere" across multiple social media tools in both political and non-political spaces, we found that social media supported the interactional dimensions of deliberative democracy--the interaction with media and the interaction between people. People used multiple tools through which they: were serendipitously exposed to diverse political information, constructed diverse information feeds, disseminated diverse information, and engaged in respectful and reasoned political discussions with diverse audiences. When people's civic agency was inhibited when using a tool, they often adopted, or switched to, alternative media that could afford what they were trying to achieve. Contrary to the polarization perspective, we find that people were purposefully seeking diverse information and discussants. Some individuals altered their views as a result of the interactions they were having in the online public sphere.}, booktitle = {Proceedings of the 17th ACM Conference on Computer Supported Cooperative Work \& Social Computing}, pages = {1409–1421}, numpages = {13}, keywords = {public sphere, multi-mediation, depolarization, social media}, location = {Baltimore, Maryland, USA}, series = {CSCW '14} } @inproceedings{Borge2015cscw, author = {Borge-Holthoefer, Javier and Magdy, Walid and Darwish, Kareem and Weber, Ingmar}, title = {Content and Network Dynamics Behind Egyptian Political Polarization on Twitter}, year = {2015}, isbn = {9781450329224}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/2675133.2675163}, doi = {10.1145/2675133.2675163}, abstract = {There is little doubt about whether social networks play a role in modern protests. This agreement has triggered an entire research avenue, in which social structure and content analysis have been central - but are typically exploited separately. Here, we combine these two approaches to shed light on the opinion evolution dynamics in Egypt during the summer of 2013 along two axes (Islamist/Secularist, pro/anti-military intervention). We intend to find traces of opinion changes in Egypt's population, paralleling those in the international community - which oscillated from sympathetic to condemnatory as civil clashes grew. We find little evidence of people "switching" sides but observe clear changes in volume with both pro- and anti-military camps becoming more active at different stages. Our work contributes new insights into the dynamics of large protest movements, specially in the aftermath of the main events - rather unattended previously. It questions the standard narrative concerning a simplistic mapping between Secularist/pro-military and Islamist/anti-military. Finally, our conclusions provide empirical validation to sociological models regarding the behavior of individuals in conflictive contexts.}, booktitle = {Proceedings of the 18th ACM Conference on Computer Supported Cooperative Work \& Social Computing}, pages = {700–711}, numpages = {12}, keywords = {egypt, polarization, mobilization, twitter, opinion switch}, location = {Vancouver, BC, Canada}, series = {CSCW '15} } @inproceedings{Bruns2019interchangably, title={It’s not the technology, stupid: How the ‘Echo Chamber’ and ‘Filter Bubble’ metaphors have failed us}, author={Axel Bruns}, year={2019} } @techreport{Ross2022interchangably, edition = {}, number = {}, journal = {}, pages = {}, publisher = {Reuters Institute for the Study of Journalism}, school = {}, title = {Echo chambers, filter bubbles, and polarisation: a literature review }, volume = {}, author = {Ross Arguedas, A and Robertson, C and Fletcher, R and Nielsen, R}, editor = {}, year = {2022}, series = {} } @incollection{Bruns2021interchangably, title={Echo chambers? Filter bubbles? The misleading metaphors that obscure the real problem}, author={Bruns, Axel}, booktitle={Hate speech and polarization in participatory society}, pages={33--48}, year={2021}, publisher={Routledge} } @article{garimella2018polarization, title={Polarization on social media}, author={Garimella, Kiran and others}, year={2018}, publisher={Aalto University} } @misc{gallup2013demvsrep, author = "Newport, Frank", title = "Democrats Racially Diverse; Republicans Mostly White", howpublished = "Online post", month = "February", year = "2013", note = "Accessed on July 5th, 2023", url = "https://news.gallup.com/poll/160373/democrats-racially-diverse-republicans-mostly-white.aspx", } @Article{sun2022homogeneity, AUTHOR = {Sun, Mingfei and Ma, Xiaoyue and Huo, Yudi}, TITLE = {Does Social Media Users' Interaction Influence the Formation of Echo Chambers? Social Network Analysis Based on Vaccine Video Comments on YouTube}, JOURNAL = {International Journal of Environmental Research and Public Health}, VOLUME = {19}, YEAR = {2022}, NUMBER = {23}, ARTICLE-NUMBER = {15869}, URL = {https://www.mdpi.com/1660-4601/19/23/15869}, PubMedID = {36497977}, ISSN = {1660-4601}, ABSTRACT = {The characteristics and influence of the echo chamber effect (TECE) of health misinformation diffusion on social media have been investigated by researchers, but the formation mechanism of TECE needs to be explored specifically and deeply. This research focuses on the influence of users' limitation, intergroup interaction, and reciprocity behavior on TECE based on the social contagion mechanism. A user comment' reply social network was constructed using the comments of a COVID-19 vaccine video on YouTube. The semantic similarity and Exponential Random Graph Model (ERGM) were used to calculate TECE and the effect of three interaction mechanisms on the echo chamber. The results show that there is a weak echo chamber effect (ECE) in the spread of misinformation about the COVID-19 vaccine. The imitation and intergroup interaction behavior are positively related to TECE. Reciprocity has no significant influence on TECE.}, DOI = {10.3390/ijerph192315869} } @article{Grusauskaite2023, author = {Kamile Grusauskaite and Luca Carbone and Jaron Harambam and Stef Aupers}, title ={Debating (in) echo chambers: How culture shapes communication in conspiracy theory networks on YouTube}, journal = {New Media \& Society}, volume = {0}, number = {0}, pages = {14614448231162585}, year = {2023}, doi = {10.1177/14614448231162585}, URL = { https://doi.org/10.1177/14614448231162585 }, eprint = { https://doi.org/10.1177/14614448231162585 } , abstract = { The ubiquity of social media platforms fuels heated discussions about algorithms and selection biases leading people into online “echo chambers.” Scholars argue that social media deepen societal polarization and fuel political extremism. However, studies often focus on media effects, disregarding individual agency and (sub)cultural values that shape communication. As a strategic case study, this article, based on a mixed-methods analysis, including a social network and qualitative analysis of 1199 comments under four conspiracy theory comment sections on YouTube, questions how insular these spaces are? And how people in these networks communicate? We find that the discussions in our strategically sampled comments sections lie between homogeneous closed debates and open debates. In other words, the networks in our sample vary in their “echo chamberness.” Based on our findings, we contend that variations in the echo chamberness of the various comment sections can be explained via the lens of conspiratorial (sub)cultures. } } @article{gao2023echo, author = {Gao, Y. and Liu, F. and Gao, L.}, title = {Echo chamber effects on short video platforms}, journal = {Sci Rep}, volume = {13}, pages = {6282}, year = {2023}, doi = {10.1038/s41598-023-33370-1}, } @article{koch, author = {Natalie Koch}, title = {The problem with rallying around the (Ukrainian) flag}, journal = {Space and Polity}, volume = {0}, number = {0}, pages = {1-5}, year = {2023}, publisher = {Routledge}, doi = {10.1080/13562576.2023.2223129} } @inproceedings{Salamat-disentaglement, author = {Salamat, Sara and Arabzadeh, Negar and Seyedsalehi, Shirin and Bigdeli, Amin and Zihayat, Morteza and Bagheri, Ebrahim}, title = {Neural Disentanglement of Query Difficulty and Semantics}, year = {2023}, isbn = {9798400701245}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3583780.3615189}, doi = {10.1145/3583780.3615189}, abstract = {Researchers have shown that the retrieval effectiveness of queries may depend on other factors in addition to the semantics of the query. In other words, several queries expressed with the same intent, and even using overlapping keywords, may exhibit completely different degrees of retrieval effectiveness. As such, the objective of our work in this paper is to propose a neural disentanglement method that is able to disentangle query semantics from query difficulty. The disentangled query semantics representation provides the means to determine semantic association between queries whereas the disentangled query difficulty representation would allow for the estimation of query effectiveness. We show through our experiments on the query performance prediction; and, query similarity calculation tasks that our proposed disentanglement method is able to show better performance compared to the state of the art.}, booktitle = {Proceedings of the 32nd ACM International Conference on Information and Knowledge Management}, pages = {4264–4268}, numpages = {5}, keywords = {disentanglement, query performance prediction, information retrieval}, location = {Birmingham, United Kingdom}, series = {CIKM '23} } @article{Coletto-SXSW, title={Automatic controversy detection in social media: A content-independent motif-based approach}, author={Mauro Coletto and Venkata Rama Kiran Garimella and A. Gionis and Claudio Lucchese}, journal={Online Soc. Networks Media}, year={2017}, volume={3-4}, pages={22-31}, url={https://api.semanticscholar.org/CorpusID:54300115} } @mastersthesis{ghafouri-thesis, author = {Ghafouri, Vahid and RezaeeDaryakenari, Babak and Kasap, Nihat}, title = {Who rallies around the flag? Analyzing the impact of foreign interventions on nations' political stance using social media data}, year = {2020}, school = {Sabancı University}, type = {Master's Thesis}, note = {[Thesis]}, url = {https://risc01.sabanciuniv.edu/record=b2473816}, } @inproceedings{He2023migration, author = {He, Jiahui and Zia, Haris Bin and Castro, Ignacio and Raman, Aravindh and Sastry, Nishanth and Tyson, Gareth}, title = {Flocking to Mastodon: Tracking the Great Twitter Migration}, year = {2023}, isbn = {9798400703829}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3618257.3624819}, doi = {10.1145/3618257.3624819}, abstract = {The acquisition of Twitter by Elon Musk has spurred controversy and uncertainty among Twitter users. The move raised both praise and concerns, particularly regarding Musk's views on free speech. As a result, a large number of Twitter users have looked for alternatives to Twitter. Mastodon, a decentralized micro-blogging social network, has attracted the attention of many users and the general media. In this paper, we analyze the migration of 136,009 users from Twitter to Mastodon. We inspect the impact that this has on the wider Mastodon ecosystem, particularly in terms of user-driven pressure towards centralization. We further explore factors that influence users to migrate, highlighting the effect of users' social networks. Finally, we inspect the behavior of individual users, showing how they utilize both Twitter and Mastodon in parallel. We find a clear difference in the topics discussed on the two platforms. This leads us to build classifiers to explore if migration is predictable. Through feature analysis, we find that the content of tweets as well as the number of URLs, the number of likes, and the length of tweets are effective metrics for the prediction of user migration.}, booktitle = {Proceedings of the 2023 ACM on Internet Measurement Conference}, pages = {111–123}, numpages = {13}, keywords = {user migration, twitter, topic modeling, mastodon, machine learning}, location = {Montreal QC, Canada}, series = {IMC '23} } @article{Slater2018nonwestpol, title={Polarizing Figures: Executive Power and Institutional Conflict in Asian Democracies}, author={D. Slater and A. Arugay}, journal={American Behavioral Scientist}, year={2018}, volume={62}, pages={106 - 92}, doi={10.1177/0002764218759577} } @article{Abramowitz2010nonwestpol, title={The Disappearing Center: Engaged Citizens, Polarization, and American Democracy}, author={A. Abramowitz}, year={2010}, doi={10.5860/choice.48-1737} } @article{Ribberink2018ReligiousPol, title={Religious polarization: contesting religion in secularized Western European countries}, author={Egbert Ribberink and P. Achterberg and D. Houtman}, journal={Journal of Contemporary Religion}, year={2018}, volume={33}, pages={209 - 227}, doi={10.1080/13537903.2018.1469262} } @article{jacomy2014forcedatlas2, doi = {10.1371/journal.pone.0098679}, author = {Jacomy, Mathieu AND Venturini, Tommaso AND Heymann, Sebastien AND Bastian, Mathieu}, journal = {PLOS ONE}, publisher = {Public Library of Science}, title = {ForceAtlas2, a Continuous Graph Layout Algorithm for Handy Network Visualization Designed for the Gephi Software}, year = {2014}, month = {06}, volume = {9}, url = {https://doi.org/10.1371/journal.pone.0098679}, pages = {1-12}, abstract = {Gephi is a network visualization software used in various disciplines (social network analysis, biology, genomics…). One of its key features is the ability to display the spatialization process, aiming at transforming the network into a map, and ForceAtlas2 is its default layout algorithm. The latter is developed by the Gephi team as an all-around solution to Gephi users’ typical networks (scale-free, 10 to 10,000 nodes). We present here for the first time its functioning and settings. ForceAtlas2 is a force-directed layout close to other algorithms used for network spatialization. We do not claim a theoretical advance but an attempt to integrate different techniques such as the Barnes Hut simulation, degree-dependent repulsive force, and local and global adaptive temperatures. It is designed for the Gephi user experience (it is a continuous algorithm), and we explain which constraints it implies. The algorithm benefits from much feedback and is developed in order to provide many possibilities through its settings. We lay out its complete functioning for the users who need a precise understanding of its behaviour, from the formulas to graphic illustration of the result. We propose a benchmark for our compromise between performance and quality. We also explain why we integrated its various features and discuss our design choices.}, number = {6}, } @article{Bailon2023FacebookPolarization, author = {Sandra González-Bailón and David Lazer and Pablo Barberá and Meiqing Zhang and Hunt Allcott and Taylor Brown and Adriana Crespo-Tenorio and Deen Freelon and Matthew Gentzkow and Andrew M. Guess and Shanto Iyengar and Young Mie Kim and Neil Malhotra and Devra Moehler and Brendan Nyhan and Jennifer Pan and Carlos Velasco Rivera and Jaime Settle and Emily Thorson and Rebekah Tromble and Arjun Wilkins and Magdalena Wojcieszak and Chad Kiewiet de Jonge and Annie Franco and Winter Mason and Natalie Jomini Stroud and Joshua A. Tucker }, title = {Asymmetric ideological segregation in exposure to political news on Facebook}, journal = {Science}, volume = {381}, number = {6656}, pages = {392-398}, year = {2023}, doi = {10.1126/science.ade7138}, URL = {https://www.science.org/doi/abs/10.1126/science.ade7138}, eprint = {https://www.science.org/doi/pdf/10.1126/science.ade7138}, abstract = {Does Facebook enable ideological segregation in political news consumption? We analyzed exposure to news during the US 2020 election using aggregated data for 208 million US Facebook users. We compared the inventory of all political news that users could have seen in their feeds with the information that they saw (after algorithmic curation) and the information with which they engaged. We show that (i) ideological segregation is high and increases as we shift from potential exposure to actual exposure to engagement; (ii) there is an asymmetry between conservative and liberal audiences, with a substantial corner of the news ecosystem consumed exclusively by conservatives; and (iii) most misinformation, as identified by Meta’s Third-Party Fact-Checking Program, exists within this homogeneously conservative corner, which has no equivalent on the liberal side. Sources favored by conservative audiences were more prevalent on Facebook’s news ecosystem than those favored by liberals.} } @article{Tornberg2018misinformation, title={Echo chambers and viral misinformation: Modeling fake news as complex contagion}, author={P. Törnberg}, journal={PLoS ONE}, year={2018}, volume={13}, doi={10.1371/journal.pone.0203958} } @article{Treviranus2009criticalthinking, title={The value of the unpopular: Counteracting the popularity echo-chamber on the Web},author={J. Treviranus and S. Hockema},journal={2009 IEEE Toronto International Conference Science and Technology for Humanity (TIC-STH)},year={2009},pages={603-608},doi={10.1109/TIC-STH.2009.5444430} } @article{Brugnoli2019Recursive, title={Recursive patterns in online echo chambers},author={Emanuele Brugnoli and Matteo Cinelli and W. Quattrociocchi and Antonio Scala},journal={Scientific Reports},year={2019},volume={9},doi={10.1038/s41598-019-56191-7}} @article{Wang2021Covid, title={Echo Chamber Effect in Rumor Rebuttal Discussions About COVID-19 in China: Social Media Content and Network Analysis Study}, author={Dandan Wang and Yuxing Qian}, journal={Journal of Medical Internet Research}, year={2021}, volume={23}, doi={10.2196/27009} } @article{langer2022gender, title={Gender is a complex number and the case for trans phantoms}, author={Langer, SJ}, journal={Studies in Gender and Sexuality}, volume={23}, number={2}, pages={136--145}, year={2022}, publisher={Taylor \& Francis} } @article{pieterse1997deconstructing, title={Deconstructing/reconstructing ethnicity}, author={Pieterse, Jan Nederveen}, journal={Nations and Nationalism}, volume={3}, number={3}, pages={365--395}, year={1997}, publisher={Wiley Online Library} } @article{Wang2023taiwan, author = {Austin Horng-En Wang and Yao-Yuan Yeh and Charles K.S. Wu and Fang-Yu Chen}, title ={Why Does Taiwan Identity Decline?}, journal = {Journal of Asian and African Studies}, pages = {00219096231168068}, year = {2023}, doi = {10.1177/00219096231168068}, abstract = { Since 1992, the percentage of Taiwanese identifying as “Taiwanese only” increased by 50\%. The literature explains the increase by generation, democratization, and military threat. None of these foresees the decline of Taiwan identity between 2016 and 2018. We argue that the decline can be explained by issue ownership+hedging. After the Democratic Progress Party (DPP) won both the presidency and the Congress for the first time in 2016, DPP’s performance was used by voters to evaluate the utility of Taiwan identity. Propensity score matching and regressions on three groups of surveys (TEDS, TISS, and TNSS) support the theory and rule out alternative explanations. } } @article{asik2024secularism, author = {Ozan Aşık}, title ={Ideology, Polarization, and News Culture: The Secular-Islamist Tension in Turkish Journalism}, journal = {The International Journal of Press/Politics}, volume = {29}, number = {2}, pages = {530-547}, year = {2024}, doi = {10.1177/19401612221132716}, URL = { https://doi.org/10.1177/19401612221132716 }, eprint = { https://doi.org/10.1177/19401612221132716 } , abstract = { What role does political ideology play in the production of news in a contentious cultural context? To address this question, this article investigates how Turkish Islamic conservative journalists produced and circulated representations of two dramatic uprisings in 2013: the Gezi Park protests in Turkey and the military coup in Egypt. I chose these two cases because the Islamic political bias and activism that shaped the production of news about these two events are symptomatic of the way in which Islamism as a political ideology instrumentalizes news making. Based on newsroom ethnography conducted at an Islamic national mainstream television channel in Turkey between 2011 and 2014, the article demonstrates how Islamism shapes the ways in which Islamic conservative journalists interpreted and articulated the two events in the newsroom, and represented them in news coverage. In this context, journalistic practice gains an ideological character when the journalists utilize journalistic representations as strategic instruments to advance the political agenda of Islamic conservatives against secular forces in Turkey. As the polarization between Islamic and secular groups is based on cultural distinctions, I argue that the political ideology determining journalistic practices is defined not only by party affiliations or socioeconomic class positions but also by the common cultural ways of living and thinking of journalists who work and live as members of a sociocultural group. Islamic ideology serves as a social cement that creates bonds among the IslamicTV journalists as a sociocultural group, and a degree of unity and common purpose in their professional practices. } } @article{Tabaar2020iranvsturkey, author = {Ayatollahi Tabaar, Mohammad and Yildirim, A.Kadir}, title = "{Religious Parties and Ideological Change: A Comparison of Iran and Turkey}", journal = {Political Science Quarterly}, volume = {135}, number = {4}, pages = {697-723}, year = {2020}, month = {08}, abstract = "{RELIGIOUS PARTIES AND THEIR IDEOLOGIES have captured the imagination of academic scholarship and public discussion since the 1980s. Specifically, Islamist parties have become a focal point as they entered the electoral politics of many Middle Eastern countries. Much of this focus is devoted to how such parties engage in ideological moderation—a legitimate academic concern with important practical implications for democratic governance, pluralism, and violence. Observers often treat religious ideology either as a fixed attribute on one extreme, or as an entirely malleable and instrumental feature on the other. For example, the Turkish president and leader of the Justice and Development Party (AKP), Recep Tayyip Erdoğan, has been criticized since his rise to political prominence in the mid-1990s for being too ideologically rigid and threatening secularism in Turkey, gradually moving the country closer to a theocracy.1 Simultaneously, Erdoğan is characterized as a political opportunist who has no ideological commitments and exploits religion with reckless abandon to serve his own political interests.2 Yet these two seemingly conflicting views are not incompatible.}", issn = {0032-3195}, doi = {10.1002/polq.13097}, url = {https://doi.org/10.1002/polq.13097}, eprint = {https://academic.oup.com/psq/article-pdf/135/4/697/48808715/psquar\_135\_4\_697.pdf}, } @article{Azmanova2011leftright, author = {Azmanova, Albena}, title = "{After the Left–Right (Dis)continuum: Globalization and the Remaking of Europe's Ideological Geography}", journal = {International Political Sociology}, volume = {5}, number = {4}, pages = {384-407}, year = {2011}, month = {12}, abstract = "{This article examines the status of globalization as a causal factor in political mobilization and proposes a research agenda for diagnosing the impact of global socio-economic dynamics on ideological orientation in national polities. Focusing on Europe's established democracies, the article outlines recent shifts in Europe's ideological landscape and explores the mechanisms generating a new pattern of political conflict and electoral competition. It advances the hypothesis that the knowledge economy of open borders has brought about a political cleavage intimately linked to citizens’ perceptions of the social impact of global economic integration. In this context, the polarization of life chances is determined by institutionally mediated exposure to both the economic opportunities and the hazards of globalization. Fostered by the increasing relevance of the international for state-bound publics, new fault-lines of social conflict are emerging, giving shape to a new, “opportunity-risk,” axis of political competition. As the novel political cleavage challenges the conventional left–right divide, it is likely to radically alter Europe's ideological geography.}", issn = {1749-5679}, doi = {10.1111/j.1749-5687.2011.00141.x}, } @book{pariser2011filter, title={The filter bubble: How the new personalized web is changing what we read and how we think}, author={Pariser, Eli}, year={2011}, publisher={Penguin} } @inproceedings{hada2023beyond, title={Beyond Digital" Echo Chambers": The Role of Viewpoint Diversity in Political Discussion}, author={Hada, Rishav and Ebrahimi Fard, Amir and Shugars, Sarah and Bianchi, Federico and Rossini, Patricia and Hovy, Dirk and Tromble, Rebekah and Tintarev, Nava}, booktitle={WSDM}, year={2023} } @article{balietti2021reducing, title={Reducing opinion polarization: Effects of exposure to similar people with differing political views}, author={Balietti, Stefano and Getoor, Lise and Goldstein, Daniel G and Watts, Duncan J}, journal={National Academy of Sciences}, volume={118}, number={52}, year={2021}, publisher={National Acad Sciences} } @article{lorentzen2021bridging, title={Bridging polarised Twitter discussions: the interactions of the users in the middle}, author={Lorentzen, David Gunnarsson}, journal={Aslib Journal of Information Management}, volume={73}, number={2}, year={2021}, publisher={Emerald Publishing Limited} } @inproceedings{an2024curated, title={Curated and Asymmetric Exposure: A Case Study of Partisan Talk during COVID on Twitter}, author={An, Zijian and Breuhaus, Jessica and Niu, Jason and Sariyuce, A Erdem and Joseph, Kenneth}, booktitle={ICWSM}, year={2024} } @article{hanley2023twits, title={Twits, toxic tweets, and tribal tendencies: Trends in politically polarized posts on twitter}, author={Hanley, Hans WA and Durumeric, Zakir}, journal={arXiv preprint arXiv:2307.10349}, year={2023} } @article{zade2024reply, title={To reply or to quote: Comparing conversational framing strategies on Twitter}, author={Zade, Himanshu and Williams, Spencer and Tran, Theresa T and Smith, Christina and Venkatagiri, Sukrit and Hsieh, Gary and Starbird, Kate}, journal={Computing and Sustainable Societies}, year={2024} } @article{marchal2022nice, title={“Be nice or leave me alone”: An intergroup perspective on affective polarization in online political discussions}, author={Marchal, Nahema}, journal={Communication Research}, volume={49}, number={3}, year={2022}, publisher={Sage Publications Sage CA: Los Angeles, CA} } @inproceedings{garimella2016quote, title={Quote RTs on Twitter: Usage of the new feature for political discourse}, author={Garimella, Kiran and Weber, Ingmar and De Choudhury, Munmun}, booktitle={ACM Conference on Web Science}, year={2016} } @article{shugars2019keep, title={Why keep arguing? Predicting engagement in political conversations online}, author={Shugars, Sarah and Beauchamp, Nicholas}, journal={Sage Open}, year={2019}, } @article{yardi2010dynamic, title={Dynamic debates: An analysis of group polarization over time on twitter}, author={Yardi, Sarita and Boyd, Danah}, journal={Bulletin of science, technology \& society}, volume={30}, number={5}, year={2010}, publisher={SAGE Publications Sage CA: Los Angeles, CA} } @article{flaxman2016filter, title={Filter bubbles, echo chambers, and online news consumption}, author={Flaxman, Seth and Goel, Sharad and Rao, Justin M}, journal={Public opinion quarterly}, volume={80}, number={S1}, year={2016}, publisher={Oxford University Press US} } @book{sunstein2001republic, title={Republic. com}, author={Sunstein, Cass R}, year={2001}, publisher={Princeton university press} } @article{barbera2015birds, title={Birds of the same feather tweet together: Bayesian ideal point estimation using Twitter data}, author={Barberá, Pablo}, journal={Political analysis}, volume={23}, number={1}, year={2015}, publisher={Cambridge University Press} } @article{bakshy2015exposure, title={Exposure to ideologically diverse news and opinion on Facebook}, author={Bakshy, Eytan and Messing, Solomon and Adamic, Lada A}, journal={Science}, volume={348}, number={6239}, year={2015}, publisher={American Association for the Advancement of Science} } @article{stroud2010polarization, title={Polarization and partisan selective exposure}, author={Stroud, Natalie Jomini}, journal={Journal of communication}, volume={60}, number={3}, year={2010}, publisher={Oxford University Press} } @article{dubois2018echo, title={The echo chamber is overstated: the moderating effect of political interest and diverse media}, author={Dubois, Elizabeth and Blank, Grant}, journal={Information, communication \& society}, volume={21}, number={5}, year={2018}, publisher={Taylor \& Francis} } @article{guess2018selective, title={Selective exposure to misinformation: Evidence from the consumption of fake news during the 2016 US presidential campaign}, author={Guess, Andrew and Nyhan, Brendan and Reifler, Jason}, journal={European Research Council}, volume={9}, number={3}, year={2018} } @inproceedings{bruns2017echo, title={Echo chamber? What echo chamber? Reviewing the evidence}, author={Bruns, Axel}, booktitle={FOJ '17}, year={2017} } @article{gentzkow2011ideological, title={Ideological segregation online and offline}, author={Gentzkow, Matthew and Shapiro, Jesse M}, journal={Journal of Economics}, year={2011}, } @article{bail2018exposure, title={Exposure to opposing views on social media can increase political polarization}, author={Bail, Christopher A and Argyle, Lisa P and Brown, Taylor W and Bumpus, John P and Chen, Haohan and Hunzaker, MB Fallin and Lee, Jaemin and Mann, Marcus and Merhout, Friedolin and Volfovsky, Alexander}, journal={National Academy of Sciences}, volume={115}, number={37}, year={2018}, publisher={National Acad Sciences} } @inproceedings{elmas2021can, title={Can Celebrities Burst Your Bubble?}, author={Elmas, Tu{\u{g}}rulcan and Hardi, Kristina and Overdorf, Rebekah and Aberer, Karl}, booktitle={MISINFO}, year={2021}, } @misc{team2020archive, title={The twitter stream grab.}, author={The Internet Archive}, year={2024} } @article{dutta2019did, title={How did the discussion go: Discourse act classification in social media conversations}, author={Dutta, Subhabrata and Chakraborty, Tanmoy and Das, Dipankar}, journal={Linking and Mining Heterogeneous and Multi-view Data}, pages={137--160}, year={2019}, publisher={Springer} } @article{wojcieszak2009online, title={Online groups and political discourse: Do online discussion spaces facilitate exposure to political disagreement?}, author={Wojcieszak, Magdalena E and Mutz, Diana C}, journal={Journal of communication}, volume={59}, number={1}, year={2009}, publisher={Oxford University Press} } @inproceedings{lees2022new, title={A new generation of perspective api: Efficient multilingual character-level transformers}, author={Lees, Alyssa and Tran, Vinh Q and Tay, Yi and Sorensen, Jeffrey and Gupta, Jai and Metzler, Donald and Vasserman, Lucy}, booktitle={ACM SIGKDD}, year={2022} } @article{karande2021stance, title={Stance detection with BERT embeddings for credibility analysis of information on social media}, author={Karande, Hema and Walambe, Rahee and Benjamin, Victor and Kotecha, Ketan and Raghu, TS}, journal={PeerJ Computer Science}, volume={7}, year={2021}, publisher={PeerJ Inc.} } @inproceedings{hutto2014vader, title={Vader: A parsimonious rule-based model for sentiment analysis of social media text}, author={Hutto, Clayton and Gilbert, Eric}, booktitle={ICWSM}, year={2014} } @article{rajadesingan2021walking, title={'Walking Into a Fire Hoping You Don't Catch': Strategies and Designs to Facilitate Cross-Partisan Online Discussions}, author={Rajadesingan, Ashwin and Duran, Carolyn and Resnick, Paul and Budak, Ceren}, journal={ACMHCI}, year={2021}, } @article{lyu2023cross, title={Cross-cutting interaction, inter-party hostility, and partisan identity: Analysis of offensive speech in social media}, author={Lyu, Zeyu}, journal={New Media \& Society}, year={2023}, publisher={SAGE Publications Sage UK: London, England} } @article{klein2024medium, title={The medium is the message: toxicity declines in structured vs unstructured online deliberations}, author={Klein, Mark and Majdoubi, Nouhayla}, journal={World Wide Web}, volume={27}, number={3}, pages={31}, year={2024}, publisher={Springer} } @article{santana2014virtuous, title={Virtuous or vitriolic: The effect of anonymity on civility in online newspaper reader comment boards}, author={Santana, Arthur D}, journal={Journalism practice}, volume={8}, number={1}, pages={18--33}, year={2014}, publisher={Taylor \& Francis} } @inproceedings{wu2021cross, title={Cross-Partisan Discussions on YouTube: Conservatives Talk to Liberals but Liberals Don't Talk to Conservatives}, author={Wu, Siqi and Resnick, Paul}, booktitle={ICWSM}, year={2021} } @inproceedings{garimella2018political, title={Political discourse on social media: Echo chambers, gatekeepers, and the price of bipartisanship}, author={Garimella, Kiran and De Francisci Morales, Gianmarco and Gionis, Aristides and Mathioudakis, Michael}, booktitle={WWW}, year={2018} } @inproceedings{torres2022manufacture, title={The manufacture of partisan echo chambers by follow train abuse on Twitter}, author={Torres-Lugo, Christopher and Yang, Kai-Cheng and Menczer, Filippo}, booktitle={ICWSM}, year={2022} } @article{hobolt2023polarizing, title={The polarizing effect of partisan echo chambers}, author={Hobolt, Sara B and Lawall, Katharina and Tilley, James}, journal={American Political Science Review}, year={2023}, publisher={Cambridge University Press} } @inproceedings{kumar2021designing, title={Designing toxic content classification for a diversity of perspectives}, author={Kumar, Deepak and Kelley, Patrick Gage and Consolvo, Sunny and Mason, Joshua and Bursztein, Elie and Durumeric, Zakir and Thomas, Kurt and Bailey, Michael}, booktitle={SOUPS '21}, year={2021} } @article{santoro2022promise, title={The promise and pitfalls of cross-partisan conversations for reducing affective polarization: Evidence from randomized experiments}, author={Santoro, Erik and Broockman, David E}, journal={Science advances}, volume={8}, number={25}, year={2022}, publisher={American Association for the Advancement of Science} } @article{grootendorst2022bertopic, title={BERTopic: Neural topic modeling with a class-based TF-IDF procedure}, author={Grootendorst, Maarten}, journal={arXiv:2203.05794}, year={2022} } @article{tibshirani1996regression, title={Regression shrinkage and selection via the lasso}, author={Tibshirani, Robert}, journal={Journal of the Royal Statistical Society Series B: Statistical Methodology}, volume={58}, number={1}, year={1996}, publisher={Oxford University Press} } @article{emmert2019high, title={High-dimensional LASSO-based computational regression models: regularization, shrinkage, and selection}, author={Emmert-Streib, Frank and Dehmer, Matthias}, journal={Machine Learning and Knowledge Extraction}, volume={1}, number={1}, year={2019}, publisher={MDPI} } @article{friedman2010regularization, title={Regularization paths for generalized linear models via coordinate descent}, author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Rob}, journal={Journal of statistical software}, volume={33}, number={1}, year={2010}, publisher={NIH Public Access} } @article{Lai2019italystance, title = {Stance polarity in political debates: A diachronic perspective of network homophily and conversations on Twitter}, journal = {Data \& Knowledge Engineering}, volume = {124}, year = {2019}, author = {Mirko Lai and Marcella Tambuscio and Viviana Patti and Giancarlo Ruffo and Paolo Rosso} } @article{Matalon2021israelsentiment, title={Using sentiment analysis to predict opinion inversion in Tweets of political communication}, author={Matalon, Yael and Magdaci, Oren and Almozlino, Ariel and Yamin, Dan}, journal={Scientific Reports}, year={2021}, } @inproceedings{Zhu2023chatgptannotation, title={Can ChatGPT reproduce human-generated labels? A study of social computing tasks}, author={Zhu, Yiming and Zhang, Peixian and Haq, Ehsan-Ul and Hui, Pan and Tyson, Gareth}, booktitle={ASONAM '23}, year={2023}, month={April 20} } @article{Lan2024LLMstance, title={Stance Detection with Collaborative Role-Infused LLM-Based Agents}, volume={18}, number={1}, journal={ICWSM}, author={Lan, Xiaochong and Gao, Chen and Jin, Depeng and Li, Yong}, year={2024}, month={May}} @book{gelman2007data, title={Data analysis using regression and multilevel/hierarchical models}, author={Gelman, Andrew}, year={2007}, publisher={Cambridge University Press} } @article{ccetinkaya2024towards, title={Towards a Programmable Humanizing AI through Scalable Stance-Directed Architecture}, author={{\c{C}}etinkaya, Yusuf M{\"u}cahit and Lee, Yeonjung and K{\"u}lah, Emre and Toroslu, {\.I}smail Hakk{\i} and Cowan, Michael A and Davulcu, Hasan}, journal={IEEE Internet Computing}, year={2024} } @inproceedings{echeverri2018lobo, title={LOBO: Evaluation of generalization deficiencies in Twitter bot classifiers}, author={Echeverr{\'i}a, Juan and De Cristofaro, Emiliano and Kourtellis, Nicolas and Leontiadis, Ilias and Stringhini, Gianluca and Zhou, Shi}, booktitle={34th annual computer security applications conference}, year={2018} } @inproceedings{stringhini2013follow, title={Follow the green: growth and dynamics in twitter follower markets}, author={Stringhini, Gianluca and Wang, Gang and Egele, Manuel and Kruegel, Christopher and Vigna, Giovanni and Zheng, Haitao and Zhao, Ben Y}, booktitle={IMC}, year={2013} } @article{aczel2020consensus, title={A consensus-based transparency checklist}, author={Aczel, Balazs and Szaszi, Barnabas and Sarafoglou, Alexandra and Kekecs, Zoltan and Kucharsk{\`y}, {\v{S}}imon and Benjamin, Daniel and Chambers, Christopher D and Fisher, Agneta and Gelman, Andrew and Gernsbacher, Morton A and others}, journal={Nature human behaviour}, volume={4}, number={1}, pages={4--6}, year={2020}, publisher={Nature Publishing Group UK London} } @article{gebru2021datasheets, title={Datasheets for datasets}, author={Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Iii, Hal Daum{\'e} and Crawford, Kate}, journal={Communications of the ACM}, volume={64}, number={12}, pages={86--92}, year={2021}, publisher={ACM New York, NY, USA} } @article{ashurst2020guide, title={A guide to writing the NeurIPS impact statement}, author={Ashurst, Carolyn and Anderljung, Markus and Prunkl, Carina and Leike, Jan and Gal, Yarin and Shevlane, Toby and Dafoe, Allan}, journal={Centre for the Governance of AI. URL: https://perma. cc/B5R8-2B9V}, year={2020} } @inproceedings{benotti2023understanding, title={Understanding Ethics in NLP Authoring and Reviewing}, author={Benotti, Luciana and Fort, Kar{\"e}n and Kan, Min-Yen and Tsvetkov, Yulia}, booktitle={Conference of the European Chapter of the Association for Computational Linguistics}, year={2023} } @misc{neurips, title = "NeurIPS 2021 Paper Checklist Guidelines", author = "{NeurIPS}", howpublished = "\url{https://neurips.cc/Conferences/2021/PaperInformation/PaperChecklist}", year = 2021 } @misc{fair, title="The FAIR Data principles", year = 2020, author="{FORCE11}", howpublished="\url{https://force11.org/info/the-fair-data-principles/}" } @book{em:86, editor = "Engelmore, Robert and Morgan, Anthony", title = "Blackboard Systems", year = 1986, address = "Reading, Mass.", publisher = "Addison-Wesley", } @article{aczel2020consensus, title={A consensus-based transparency checklist}, author={Aczel, Balazs and Szaszi, Barnabas and Sarafoglou, Alexandra and Kekecs, Zoltan and Kucharsk{\`y}, {\v{S}}imon and Benjamin, Daniel and Chambers, Christopher D and Fisher, Agneta and Gelman, Andrew and Gernsbacher, Morton A and others}, journal={Nature human behaviour}, volume={4}, number={1}, pages={4--6}, year={2020}, publisher={Nature Publishing Group UK London} } @inproceedings{c:83, author = "Clancey, William J.", year = 1983, title = "{Communication, Simulation, and Intelligent Agents: Implications of Personal Intelligent Machines for Medical Education}", booktitle="Proceedings of the Eighth International Joint Conference on Artificial Intelligence {(IJCAI-83)}", pages = "556-560", address = "Menlo Park, Calif", publisher = "{IJCAI Organization}", } @inproceedings{c:84, author = "Clancey, William J.", year = 1984, title = "{Classification Problem Solving}", booktitle = "Proceedings of the Fourth National Conference on Artificial Intelligence", pages = "45-54", address = "Menlo Park, Calif.", publisher="AAAI Press", } @article{r:80, author = {Robinson, Arthur L.}, title = {New Ways to Make Microcircuits Smaller}, volume = {208}, number = {4447}, pages = {1019--1022}, year = {1980}, doi = {10.1126/science.208.4447.1019}, publisher = {American Association for the Advancement of Science}, issn = {0036-8075}, URL = {https://science.sciencemag.org/content/208/4447/1019}, eprint = {https://science.sciencemag.org/content/208/4447/1019.full.pdf}, journal = {Science}, } @article{r:80x, author = "Robinson, Arthur L.", year = 1980, title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}", journal = "Science", volume = 208, pages = "1019-1026", } @article{hcr:83, title = {Strategic explanations for a diagnostic consultation system}, journal = {International Journal of Man-Machine Studies}, volume = {20}, number = {1}, pages = {3-19}, year = {1984}, issn = {0020-7373}, doi = {https://doi.org/10.1016/S0020-7373(84)80003-6}, url = {https://www.sciencedirect.com/science/article/pii/S0020737384800036}, author = {Diane Warner Hasling and William J. Clancey and Glenn Rennels}, abstract = {This article examines the problem of automatte explanation of reasoning, especially as it relates to expert systems. By explanation we mean the ability of a program to discuss what it is doing in some understandable way. We first present a general framework in which to view explanation and review some of the research done in this area. We then focus on the explanation system for NEOMYCIN, a medical consultation program. A consultation program interactively helps a user to solve a problem. Our goal is to have NEOMYCIN explain its problem-solving strategies. An explanation of strategy describes the plan the program is using to reach a solution. Such an explanation is usually concrete, referring to aspects of the current problem situation. Abstract explanations articulate a general principle, which can be applied in different situations; such explanations are useful in teaching and in explaining by analogy. We describe the aspects of NEOMYCIN that make abstract strategic explanations possible—the representation of strategic knowledge explicitly and separately from domain knowledge— and demonstrate how this representation can be used to generate explanations.} } @article{hcrt:83, author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas", year = 1983, title = "{Strategic Explanations in Consultation---Duplicate}", journal = "The International Journal of Man-Machine Studies", volume = 20, number = 1, pages = "3-19", } @techreport{r:86, author = "Rice, James", year = 1986, title = "{Poligon: A System for Parallel Problem Solving}", type = "Technical Report", number = "KSL-86-19", institution = "Dept.\ of Computer Science, Stanford Univ.", } @phdthesis{c:79, author = "Clancey, William J.", year = 1979, title = "{Transfer of Rule-Based Expertise through a Tutorial Dialogue}", type = "{Ph.D.} diss.", school = "Dept.\ of Computer Science, Stanford Univ.", address = "Stanford, Calif.", } @unpublished{c:21, author = "Clancey, William J.", title = "{The Engineering of Qualitative Models}", year = 2021, note = "Forthcoming", } @misc{c:22, title={Crime and punishment in scientific research}, author={Mathieu Bouville}, year={2008}, eprint={0803.4058}, archivePrefix={arXiv}, primaryClass={physics.soc-ph} } @article{gebru2021datasheets, title={Datasheets for datasets}, author={Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Iii, Hal Daum{\'e} and Crawford, Kate}, journal={Communications of the ACM}, volume={64}, number={12}, pages={86--92}, year={2021}, publisher={ACM New York, NY, USA} } @article{ashurst2020guide, title={A guide to writing the NeurIPS impact statement}, author={Ashurst, Carolyn and Anderljung, Markus and Prunkl, Carina and Leike, Jan and Gal, Yarin and Shevlane, Toby and Dafoe, Allan}, journal={Centre for the Governance of AI. URL: https://perma. cc/B5R8-2B9V}, year={2020} } @inproceedings{benotti2023understanding, title={Understanding Ethics in NLP Authoring and Reviewing}, author={Benotti, Luciana and Fort, Kar{\"e}n and Kan, Min-Yen and Tsvetkov, Yulia}, booktitle={Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts}, pages={19--24}, year={2023} } @misc{neurips, title = "NeurIPS 2021 Paper Checklist Guidelines", author = "{NeurIPS}", howpublished = "\url{https://neurips.cc/Conferences/2021/PaperInformation/PaperChecklist}", year = 2021 } @misc{fair, title="The FAIR Data principles", year = 2020, author="{FORCE11}", howpublished="\url{https://force11.org/info/the-fair-data-principles/}" } @misc{c:23, title = "Pluto: The 'Other' Red Planet", author = "{NASA}", howpublished = "\url{https://www.nasa.gov/nh/pluto-the-other-red-planet}", year = 2015, note = "Accessed: 2018-12-06" } %Entries @article{Sharevski2021HeyAW, title={"Hey Alexa, What do You Know About the COVID-19 Vaccine?" - (Mis)perceptions of Mass Immunization Among Voice Assistant Users}, author={Filipo Sharevski and Anna Slowinski and Peter Jachim and Emma Pieroni}, journal={ArXiv}, year={2021}, volume={abs/2105.07854} } @article{ferrer2021tkde, author = {Xavier Ferrer-Aran and Tom van Nuenen and Natalia Criado and Jose Such}, title = {Discovering and Interpreting Conceptual Biases in Online Communities}, journal = {IEEE Transactions on Knowledge and Data Engineering (TKDE)}, year = {2021}, doi = {10.1109/TKDE.2021.3139680}, url = {https://www.computer.org/csdl/journal/tk/5555/01/09667280/1zMCh7YGvfi} } @inproceedings{ferrer2021discovering, title={Discovering and Categorising Language Biases in Reddit}, author={Ferrer, Xavier and van Nuenen, Tom and Such, Jose and Criado, Natalia}, booktitle={Proceedings of the International AAAI Conference on Web and Social Media (ICWSM)}, volume={15}, pages={140--151}, year={2021} } @article{Caliskan2017, title = "Semantics derived automatically from language corpora contain human-like biases", abstract = "Machine learning is a means to derive artificial intelligence by discovering patterns in existing data. Here, we show that applying machine learning to ordinary human language results in human-like semantic biases. We replicated a spectrum of known biases, as measured by the Implicit Association Test, using a widely used, purely statistical machine-learning model trained on a standard corpus of text from the World Wide Web. Our results indicate that text corpora contain recoverable and accurate imprints of our historic biases, whether morally neutral as toward insects or flowers, problematic as toward race or gender, or even simply veridical, reflecting the status quo distribution of gender with respect to careers or first names. Our methods hold promise for identifying and addressing sources of bias in culture, including technology.", keywords = "cs.AI, cs.CL, cs.CY, cs.LG", author = "Aylin Caliskan and Bryson, {Joanna J} and Arvind Narayanan", year = "2017", month = apr, day = "14", doi = "10.1126/science.aal4230", language = "English", volume = "356", pages = "183--186", journal = "Science", issn = "0036-8075", publisher = "American Association for the Advancement of Science", number = "6334", } @inproceedings{Bolukbasi2016, author = {Bolukbasi, Tolga and Chang, Kai-Wei and Zou, James and Saligrama, Venkatesh and Kalai, Adam}, title = {Man is to Computer Programmer as Woman is to Homemaker? Debiasing Word Embeddings}, year = {2016}, isbn = {9781510838819}, publisher = {Curran Associates Inc.}, address = {Red Hook, NY, USA}, abstract = {The blind application of machine learning runs the risk of amplifying biases present in data. Such a danger is facing us with word embedding, a popular framework to represent text data as vectors which has been used in many machine learning and natural language processing tasks. We show that even word embeddings trained on Google News articles exhibit female/male gender stereotypes to a disturbing extent. This raises concerns because their widespread use, as we describe, often tends to amplify these biases. Geometrically, gender bias is first shown to be captured by a direction in the word embedding. Second, gender neutral words are shown to be linearly separable from gender definition words in the word embedding. Using these properties, we provide a methodology for modifying an embedding to remove gender stereotypes, such as the association between the words receptionist and female, while maintaining desired associations such as between the words queen and female. Using crowd-worker evaluation as well as standard benchmarks, we empirically demonstrate that our algorithms significantly reduce gender bias in embeddings while preserving the its useful properties such as the ability to cluster related concepts and to solve analogy tasks. The resulting embeddings can be used in applications without amplifying gender bias.}, booktitle = {Proceedings of the 30th International Conference on Neural Information Processing Systems}, pages = {4356–4364}, numpages = {9}, location = {Barcelona, Spain}, series = {NIPS'16} } @article{Ethayarajh2019, author = {Kawin Ethayarajh and David Duvenaud and Graeme Hirst}, title = {Understanding Undesirable Word Embedding Associations}, journal = {CoRR}, volume = {abs/1908.06361}, year = {2019}, url = {http://arxiv.org/abs/1908.06361}, archivePrefix = {arXiv}, eprint = {1908.06361}, timestamp = {Mon, 26 Aug 2019 13:20:40 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1908-06361.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, pages = {1696-1705} } @MISC{Richter2019, author = {Richter, Matthias}, title = {Comparing Word Embeddings}, month = May, year = {2019}, howpublished={\url{https://towardsdatascience.com/comparing-word-embeddings-c2efd2455fe3}} } @article{Kiefer2007, title = {Implicit stereotypes and women’s math performance: How implicit gender-math stereotypes influence women’s susceptibility to stereotype threat}, journal = {Journal of Experimental Social Psychology}, volume = {43}, number = {5}, pages = {825-832}, year = {2007}, issn = {0022-1031}, doi = {https://doi.org/10.1016/j.jesp.2006.08.004}, url = {https://www.sciencedirect.com/science/article/pii/S0022103106001399}, author = {Amy K. Kiefer and Denise Sekaquaptewa}, keywords = {Women, Math performance, Implicit associations, Stereotypes}, abstract = {This experiment examined the effects of implicit gender-math stereotyping and implicit gender and math identification on women’s math performance under stereotype threat and reduced threat conditions. Results showed that of the three, only implicit gender-math stereotyping moderated stereotype threat effects on women’s math performance: women who showed less implicit math-gender stereotyping showed the largest performance difference across experimental conditions. These results suggest that women’s implicit associations between gender and math interact with situational cues to influence their math performance: women who implicitly associate women more than men with mathematics were most benefited by reduction of stereotype salience during testing.} } @inproceedings{Babaeianjelodar2020, author = {Babaeianjelodar, Marzieh and Lorenz, Stephen and Gordon, Josh and Matthews, Jeanna and Freitag, Evan}, title = {Quantifying Gender Bias in Different Corpora}, year = {2020}, isbn = {9781450370240}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3366424.3383559}, doi = {10.1145/3366424.3383559}, abstract = {Word embedding models have been shown to be effective in performing a wide variety of Natural Language Processing (NLP) tasks such as identifying audiences for web advertisements, parsing resum\'{e}s to select promising job candidates, and translating documents from one language to another. However, it has been demonstrated that NLP systems learn gender bias from the corpora of documents on which they are trained. It is increasingly common for pre-trained models to be used as a starting point for building applications in a wide range of areas including critical decision making applications. It is also very easy to use a pre-trained model as the basis for a new application without careful consideration of the original nature of the training set. In this paper, we quantify the degree to which gender bias differs with the corpora used for training. We look especially at the impact of starting with a pre-trained model and fine-tuning with additional data. Specifically, we calculate a measure of direct gender bias on several pre-trained models including BERT’s Wikipedia and Book corpus models as well as on several fine-tuned General Language Understanding Evaluation (GLUE) benchmarks. In addition, we evaluate the bias from several more extreme corpora including the Jigsaw identity toxic dataset that includes toxic speech biased against race, gender, religion, and disability and the RtGender dataset that includes speech specifically labelled by gender. Our results reveal that the direct gender bias of the Jigsaw toxic identity dataset is surprisingly close to that of the base pre-trained Google model, but the RtGender dataset has significantly higher direct gender bias than the base model. When the bias learned by an NLP system can vary significantly with the corpora used for training, it becomes important to consider and report these details, especially for use in critical decision-making applications. }, booktitle = {Companion Proceedings of the Web Conference 2020}, pages = {752–759}, numpages = {8}, keywords = {BERT, gender bias, datasets, natural language processing}, location = {Taipei, Taiwan}, series = {WWW '20} } @book{Wetherell1992, title = {Mapping the Language of Racism: Discourse and the Legitimation of Exploitation}, author = {Margaret Wetherell and Johathan Potter}, address = {London and New York}, publisher = {Harvester Wheatsheaf and Columbia University Press}, year = {1992}, url = {http://oro.open.ac.uk/24335/}, abstract = {The topics of race and racism are often treated narrowly in social psychological and other social scientific literature, usually being presented as subcategories of stereotyping or prejudice or attitudes. In this new book, Margaret Wetherell and Jonathan Potter extend their work on the use of discourse analysis in social science to cover racism and to include issues of social structure, power relations and ideology. Part 1 provides the theoretical framework within which representations of race can be studied, part 2 an empirical illustration from New Zealand of the arguments of part 1.} } @article{Risberg2009, author = {Risberg, Gunilla and Johansson, Eva and Hamberg, Katarina}, year = {2009}, month = {09}, pages = {28}, title = {A theoretical model for analysing gender bias in medicine}, volume = {8}, journal = {International journal for equity in health}, doi = {10.1186/1475-9276-8-28} } @article{Isaac2009, title={Interventions That Affect Gender Bias in Hiring: A Systematic Review}, author={C. Isaac and Barbara Lee and M. Carnes}, journal={Academic Medicine}, year={2009}, volume={84}, pages={1440-1446} } @article{Budrikis2020, author = {Budrikis, Zoe}, year = {2020}, month = {06}, pages = {}, title = {Growing citation gender gap}, volume = {2}, journal = {Nature Reviews Physics}, doi = {10.1038/s42254-020-0207-3} } @article{Hoover2019, author = {Hoover, Ann and Hack, Tay and Garcia, Amber and Goodfriend, Wind and Habashi, Meara}, year = {2019}, month = {06}, pages = {}, title = {Powerless Men and Agentic Women: Gender Bias in Hiring Decisions}, volume = {80}, journal = {Sex Roles}, doi = {10.1007/s11199-018-0964-y} } @article{Sauermann2019, author = {Sauermann, Jan and Mengel, Friederike and Zölitz, Ulf}, year = {2019}, month = {04}, pages = {535-566}, title = {Gender Bias in Teaching Evaluations}, volume = {17}, journal = {Journal of the European Economic Association}, doi = {10.1093/jeea/jvx057} } @article{Hamberg2008, author = {Katarina Hamberg}, title ={Gender Bias in Medicine}, journal = {Women's Health}, volume = {4}, number = {3}, pages = {237-243}, year = {2008}, doi = {10.2217/17455057.4.3.237}, URL = { https://doi.org/10.2217/17455057.4.3.237 }, eprint = { https://doi.org/10.2217/17455057.4.3.237 } , abstract = { Gender bias has implications in the treatment of both male and female patients and it is important to take into consideration in most fields of medical research, clinical practice and education. Gender blindness and stereotyped preconceptions about men and women are identified as key causes to gender bias. However, exaggeration of observed sex and gender differences can also lead to bias. This article will examine the phenomenon of gender bias in medicine, present useful concepts and models for the understanding of bias, and outline areas of interest for further research. } } @article{Gilbert1994, author = {Gilbert, Julie R. and Williams, Elaine S. and Lundberg, George D.}, title = "{Is There Gender Bias in JAMA's Peer Review Process?}", journal = {JAMA}, volume = {272}, number = {2}, pages = {139-142}, year = {1994}, month = {07}, abstract = "{Objective.—To assess whether manuscripts received by JAMA in 1991 possessed differing peer review and manuscript processing characteristics, or had a variable chance of acceptance, associated with the gender of the participants in the peer review process.Design.—Retrospective cohort study of 1851 research articles.Setting.—JAMA editorial office.Participants.—Eight male and five female JAMA editors, 2452 male and 930 female reviewers, and 1698 male and 462 female authors.Main Outcome Measure.—Statistically significant gender bias.Results.—Female editors were assigned manuscripts from female corresponding authors more often than were male editors (P\\<.001). Female editors used more reviewers per manuscript if sent for other review. Male reviewers assisted male editors more often than female editors, and male reviewers took longer to return manuscripts than did their female counterparts (median, 25 vs 22 days). Content reviewer recommendations were independent of corresponding author and review gender, while male statistical reviewers recommended the highest and lowest categories more frequently than did female statistical reviewers (P\\<.001). Manuscripts handled by female editors were rejected summarily at higher rates (P\\<.001). Articles submitted to JAMA in 1991 were not accepted at significantly different rates based on the gender of the corresponding author or the assigned editor (P\\>.4).Conclusions.—Gender differences exist in editor and reviewer characteristics at JAMA with no apparent effect on the final outcome of the peer review process or acceptance for publication.(JAMA. 1994;272:139-142)}", issn = {0098-7484}, doi = {10.1001/jama.1994.03520020065018}, url = {https://doi.org/10.1001/jama.1994.03520020065018}, eprint = {https://jamanetwork.com/journals/jama/articlepdf/376219/jama\_272\_2\_018.pdf}, } @article{Schamder2002, title = {Gender Identification Moderates Stereotype Threat Effects on Women's Math Performance}, journal = {Journal of Experimental Social Psychology}, volume = {38}, number = {2}, pages = {194-201}, year = {2002}, issn = {0022-1031}, doi = {https://doi.org/10.1006/jesp.2001.1500}, url = {https://www.sciencedirect.com/science/article/pii/S0022103101915008}, author = {Toni Schmader}, abstract = {This research applies a social identity perspective to situations of stereotype threat. It was hypothesized that individuals would be more susceptible to the performance-inhibiting effects of stereotype threat to the extent that they are highly identified with the group to which a negative stereotype applies. A quasi-experimental study with male and female college students revealed that individual differences in gender identification (i.e., importance placed on gender identity) moderated the effects of gender identity relevance on women's (but not men's) math performance. When their gender identity was linked to their performance on a math test, women with higher levels of gender identification performed worse than men, but women with lower levels of gender identification performed equally to men. When gender identity was not linked to test performance, women performed equally to men regardless of the importance they placed on gender identity.} } @article{Greenwald1998, title={Measuring individual differences in implicit cognition: the implicit association test.}, author={A. Greenwald and D. McGhee and J. L. Schwartz}, journal={Journal of personality and social psychology}, year={1998}, volume={74 6}, pages={ 1464-80 } } @article{Hansen2019, author = {Hansen, Matt and Schoonover, Amanda and Skarica, Barbara and Harrod, Tabria and Bahr, Nathan and Guise, Jeanne-Marie}, year = {2019}, month = {10}, pages = {396}, title = {Implicit gender bias among US resident physicians}, volume = {19}, journal = {BMC Medical Education}, doi = {10.1186/s12909-019-1818-1} } @article {Garg2018, author = {Garg, Nikhil and Schiebinger, Londa and Jurafsky, Dan and Zou, James}, title = {Word embeddings quantify 100 years of gender and ethnic stereotypes}, volume = {115}, number = {16}, pages = {E3635--E3644}, year = {2018}, doi = {10.1073/pnas.1720347115}, publisher = {National Academy of Sciences}, abstract = {Word embeddings are a popular machine-learning method that represents each English word by a vector, such that the geometry between these vectors captures semantic relations between the corresponding words. We demonstrate that word embeddings can be used as a powerful tool to quantify historical trends and social change. As specific applications, we develop metrics based on word embeddings to characterize how gender stereotypes and attitudes toward ethnic minorities in the United States evolved during the 20th and 21st centuries starting from 1910. Our framework opens up a fruitful intersection between machine learning and quantitative social science.Word embeddings are a powerful machine-learning framework that represents each English word by a vector. The geometric relationship between these vectors captures meaningful semantic relationships between the corresponding words. In this paper, we develop a framework to demonstrate how the temporal dynamics of the embedding helps to quantify changes in stereotypes and attitudes toward women and ethnic minorities in the 20th and 21st centuries in the United States. We integrate word embeddings trained on 100 y of text data with the US Census to show that changes in the embedding track closely with demographic and occupation shifts over time. The embedding captures societal shifts{\textemdash}e.g., the women{\textquoteright}s movement in the 1960s and Asian immigration into the United States{\textemdash}and also illuminates how specific adjectives and occupations became more closely associated with certain populations over time. Our framework for temporal analysis of word embedding opens up a fruitful intersection between machine learning and quantitative social science.}, issn = {0027-8424}, URL = {https://www.pnas.org/content/115/16/E3635}, eprint = {https://www.pnas.org/content/115/16/E3635.full.pdf}, journal = {Proceedings of the National Academy of Sciences} } @article{Sutton2018, author = {Sutton, Adam and Lansdall-Welfare, Thomas and Cristianini, Nello}, year = {2018}, month = {06}, pages = {}, title = {Biased Embeddings from Wild Data: Measuring, Understanding and Removing} } @misc{Zhao2017, title={Men Also Like Shopping: Reducing Gender Bias Amplification using Corpus-level Constraints}, author={Jieyu Zhao and Tianlu Wang and Mark Yatskar and Vicente Ordonez and Kai-Wei Chang}, year={2017}, eprint={1707.09457}, archivePrefix={arXiv}, primaryClass={cs.AI} } @misc{Zhao2019, title={Gender Bias in Contextualized Word Embeddings}, author={Jieyu Zhao and Tianlu Wang and Mark Yatskar and Ryan Cotterell and Vicente Ordonez and Kai-Wei Chang}, year={2019}, eprint={1904.03310}, archivePrefix={arXiv}, primaryClass={cs.CL} } @article{Swinger2018, author = {Nathaniel Swinger and Maria De{-}Arteaga and Neil Thomas Heffernan IV and Mark D. M. Leiserson and Adam Tauman Kalai}, title = {What are the biases in my word embedding?}, journal = {CoRR}, volume = {abs/1812.08769}, year = {2018}, url = {http://arxiv.org/abs/1812.08769}, archivePrefix = {arXiv}, eprint = {1812.08769}, timestamp = {Wed, 02 Jan 2019 14:40:18 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-1812-08769.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{Mountford2018, AUTHOR = {Mountford, Joseph B.}, TITLE = {Topic Modeling The Red Pill}, JOURNAL = {Social Sciences}, VOLUME = {7}, YEAR = {2018}, NUMBER = {3}, ARTICLE-NUMBER = {42}, URL = {https://www.mdpi.com/2076-0760/7/3/42}, ISSN = {2076-0760}, ABSTRACT = {The Men’s Rights Activism (MRA) movement and its sub-movement The Red Pill (TRP), has flourished online, offering support and advice to men who feel their masculinity is being challenged by societal shifts. Whilst some insightful studies have been carried out, the small samples analysed by researchers limits the scope of studies, which is small compared to the large amounts of data that TRP produces. By extracting a significant quantity of content from a prominent MRA website, ReturnOfKings.com (RoK), whose creator is one of the most prominent figures in the manosphere and who has been featured in multiple studies. Research already completed can be expanded upon with topic modelling and neural networked machine learning, computational analysis that is proposed to augment methodologies of open coding by automatically and unbiasedly analysing conceptual clusters. The successes and limitations of this computational methodology shed light on its further uses in sociological research and has answered the question: What can topic modeling demonstrate about the men’s rights activism movement’s prescriptive masculinity? This methodology not only proved that it could replicate the results of a previous study, but also delivered insights into an increasingly political focus within TRP, and deeper perspectives into the concepts identified within the movement.}, DOI = {10.3390/socsci7030042} } @article{Valkenburgh2021, author = {Shawn P. Van Valkenburgh}, title ={Digesting the Red Pill: Masculinity and Neoliberalism in the Manosphere}, journal = {Men and Masculinities}, volume = {24}, number = {1}, pages = {84-103}, year = {2021}, doi = {10.1177/1097184X18816118}, URL = { https://doi.org/10.1177/1097184X18816118 }, eprint = { https://doi.org/10.1177/1097184X18816118 } , abstract = { This article undertakes the first known qualitative study focusing on The Red Pill, an online forum wherein heterosexual men attempt to improve their seduction skills by discussing evolutionary psychology and economic theories. My content analysis of twenty-six documents (130,000 words) designated by the community as central to its purpose and ideology shows that The Red Pill is not just an expression of hegemonic masculinity but also explicitly integrates neoliberal and scientific discourses into its seduction strategies. I theorize that the resulting philosophy superficially resolves a contradiction between hegemonic masculinity’s prescriptive emotional walls and an inherent desire for connection by constructing women as exchangeable commodities. } } @article{Dignam2019, author = {Dignam, Pierce Alexander and Rohlinger, Deana A.}, title = {Misogynistic Men Online: How the Red Pill Helped Elect Trump}, journal = {Signs: Journal of Women in Culture and Society}, volume = {44}, number = {3}, pages = {589-612}, year = {2019}, doi = {10.1086/701155}, URL = { https://doi.org/10.1086/701155 }, eprint = { https://doi.org/10.1086/701155 } , abstract = { AbstractDonald Trump’s 2016 electoral victory was a shock for feminist scholars, yet it was no surprise to his legion of supporters in alt-right digital spaces. In this essay, we analyze one of the online forums that helped propel Trump to electoral victory. Drawing on social movement concepts and an analysis of 1,762 posts, we show how leaders of the forum the “Red Pill” were able to move a community of adherents from understanding men’s rights as a personal philosophy to political action. This transition was no small endeavor. The Red Pill forum was explicitly apolitical until the summer before the 2016 election. During the election, forum leaders linked the forum’s neoliberal, misogynistic collective identity of alpha masculinity to Trump’s public persona and framed his political ascendance as an opportunity to effectively push back against feminism and get a “real” man into the White House. We argue that while previous research shows the importance of alt-right virtual spaces in creating and maintaining racist collective identities, we know very little about how men conceptualize gender in ways that inform their personal and political action—and this is to our detriment. We conclude the essay by arguing that feminists need to understand how men cultivate extreme personal and political identities in online forums so that we can better understand how new technologies are used to move individuals from the armchair to the streets. } } @article{Wright2020, author = {Scott Wright and Verity Trott and Callum Jones}, title = {‘The pussy ain’t worth it, bro’: assessing the discourse and structure of MGTOW}, journal = {Information, Communication \& Society}, volume = {23}, number = {6}, pages = {908-925}, year = {2020}, publisher = {Routledge}, doi = {10.1080/1369118X.2020.1751867}, URL = { https://doi.org/10.1080/1369118X.2020.1751867 }, eprint = { https://doi.org/10.1080/1369118X.2020.1751867 } , abstract = {In the era of networked affordances, misogynistic men’s groups have been rapidly growing and have contributed to several physical fatal attacks along with the propagation of gendered online harassment and e-bile. It is thus important to study the organisational structures and communication dynamics of these groups to provide insight into why they have been successful in recruiting members and how they further spread and normalise misogynistic beliefs. One such misogynistic group is MGTOW (Men Going Their Own Way). This study seeks to understand the structure and content of discussion within the forum of the official MGTOW website. To do so, it undertakes a content analysis of comments (n = 1012) inspired by Freelon’s (2010, 2015) multi-norm approach to studying online communication, which draws on the liberal individual, communitarian and deliberative models. It also assesses the broader patterns of commenting (n = 628,745) participation amongst users (n = 33,863). The results suggest that debates on the forum combine elements of each communicative approach and that this reflects a contradiction in their underlying ideology of separation and individualism. In addition, it was found that topics of conversation primarily focused on two topics: women (the majority of which unfold in a misogynistic way) and defining MGTOW – both as a collective identity and a personal journey. } } @misc{Taylor2020, author = {Erin Taylor}, title = {REDDIT’S FEMALE DATING STRATEGY OFFERS WOMEN ADVICE — AND A STRICT RULEBOOK FOR HOW TO ACT}, year = 2020, url = {https://www.theverge.com/2020/2/14/21137852/reddit-female-dating-advice-strategy-women-rulebook-memes} } @article{Dynel2020, title = {Vigilante disparaging humour at r/IncelTears: Humour as critique of incel ideology}, journal = {Language \& Communication}, volume = {74}, pages = {1-14}, year = {2020}, issn = {0271-5309}, doi = {https://doi.org/10.1016/j.langcom.2020.05.001}, url = {https://www.sciencedirect.com/science/article/pii/S0271530920300410}, author = {Marta Dynel}, keywords = {Disparaging disaffiliative humour, Incel ideology, Misogyny, Multimodal critical discourse analysis, Satire and parody, Trolling}, abstract = {This paper gives a comprehensive account of a humorous practice on the IncelTears subreddit, whose aim is to poke fun at, and give a social commentary on, the notorious online community of incels (hateful involuntary celibate men). Based on a representative corpus, the predominant categories of user-generated multimodal items are teased out relative to their form and stance. The central characteristics and socio-pragmatic aims of these humorous practices are discussed, together with the ideological meanings they communicate. Apart from disparaging and critiquing what they consider to be indicative of the pernicious incel ideology, the subreddit community members derive pleasure from the humorous items and forge solidarity links. Overall, this study offers valid conclusions about the (dis)affiliative and informative functions of creative humour on social media represented by the subreddit which humorously addresses a socially relevant, serious problem.} } @article{Massanari2017, author = {Massanari, Adrienne}, year = {2017}, month = {12}, pages = {1-19}, title = {''Come for the period comics. Stay for the cultural awareness'': reclaiming the troll identity through feminist humor on Reddit’s /r/TrollXChromosomes}, volume = {19}, journal = {Feminist Media Studies}, doi = {10.1080/14680777.2017.1414863} } @misc{Myers2020, author = {Quinn Myers}, title = {WHAT'S BETTER THAN THIS? GUYS BEING (GOOD) DUDES ON REDDIT'S TROLLYCHROMOSOME}, year = {2020}, url = {https://melmagazine.com/en-us/story/trollychromosome-reddit-toxic-masculinity} } @inproceedings{Zampieri2019offenseval, title={SemEval-2019 Task 6: Identifying and Categorizing Offensive Language in Social Media (OffensEval)}, author={Zampieri, Marcos and Malmasi, Shervin and Nakov, Preslav and Rosenthal, Sara and Farra, Noura and Kumar, Ritesh}, booktitle={Proceedings of the 13th International Workshop on Semantic Evaluation}, pages={75--86}, year={2019} } @misc{Samoshyn2020, author = {Andriy Samoshyn}, title = {Hate Speech and Offensive Language Dataset}, howpublished = {\url{https://www.kaggle.com/mrmorj/hate-speech-and-offensive-language-dataset/metadata}}, note = {Last Update: 2020-06-17}, year = {2020} } @inproceedings{Wulczyn2017, author = {Wulczyn, Ellery and Thain, Nithum and Dixon, Lucas}, title = {Ex Machina: Personal Attacks Seen at Scale}, year = {2017}, isbn = {9781450349130}, publisher = {International World Wide Web Conferences Steering Committee}, address = {Republic and Canton of Geneva, CHE}, url = {https://doi.org/10.1145/3038912.3052591}, doi = {10.1145/3038912.3052591}, abstract = {The damage personal attacks cause to online discourse motivates many platforms to try to curb the phenomenon. However, understanding the prevalence and impact of personal attacks in online platforms at scale remains surprisingly difficult. The contribution of this paper is to develop and illustrate a method that combines crowdsourcing and machine learning to analyze personal attacks at scale. We show an evaluation method for a classifier in terms of the aggregated number of crowd-workers it can approximate. We apply our methodology to English Wikipedia, generating a corpus of over 100k high quality human-labeled comments and 63M machine-labeled ones from a classifier that is as good as the aggregate of 3 crowd-workers, as measured by the area under the ROC curve and Spearman correlation. Using this corpus of machine-labeled scores, our methodology allows us to explore some of the open questions about the nature of online personal attacks. This reveals that the majority of personal attacks on Wikipedia are not the result of a few malicious users, nor primarily the consequence of allowing anonymous contributions from unregistered users.}, booktitle = {Proceedings of the 26th International Conference on World Wide Web}, pages = {1391–1399}, numpages = {9}, keywords = {online harassment, online discussions, wikipedia}, location = {Perth, Australia}, series = {WWW '17} } @article{Newman2004, author = {Newman, Mark}, year = {2004}, month = {12}, pages = {}, title = {Power Laws, Pareto Distributions and Zipf's Law}, volume = {46}, journal = {Contemporary Physics - CONTEMP PHYS}, doi = {10.1080/00107510500052444} } @inproceedings{Hine2017, author = {Hine, Gabriel and Onaolapo, Jeremiah and De Cristofaro, Emiliano and Kourtellis, Nicolas and Leontiadis, Ilias and Samaras, Riginos and Stringhini, Gianluca and Blackburn, Jeremy}, year = {2017}, month = {05}, pages = {92-101}, title = {Kek, Cucks, and God Emperor Trump: A Measurement Study of 4chan's Politically Incorrect Forum and its Effects on the Web}, booktitle={ICWSM}, } @article{Balayn2021, author = {Balayn, Agathe and Yang, Jie and Szlavik, Zoltan and Bozzon, Alessandro}, title = {Automatic Identification of Harmful, Aggressive, Abusive, and Offensive Language on the Web: A Survey of Technical Biases Informed by Psychology Literature}, year = {2021}, issue_date = {September 2021}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {4}, number = {3}, issn = {2469-7818}, url = {https://doi.org/10.1145/3479158}, doi = {10.1145/3479158}, abstract = {The automatic detection of conflictual languages (harmful, aggressive, abusive, and offensive languages) is essential to provide a healthy conversation environment on the Web. To design and develop detection systems that are capable of achieving satisfactory performance, a thorough understanding of the nature and properties of the targeted type of conflictual language is of great importance. The scientific communities investigating human psychology and social behavior have studied these languages in details, but their insights have only partially reached the computer science community.In this survey, we aim both at systematically characterizing the conceptual properties of online conflictual languages, and at investigating the extent to which they are reflected in state-of-the-art automatic detection systems. Through an analysis of psychology literature, we provide a reconciled taxonomy that denotes the ensemble of conflictual languages typically studied in computer science. We then characterize the conceptual mismatches that can be observed in the main semantic and contextual properties of these languages and their treatment in computer science works; and systematically uncover resulting technical biases in the design of machine learning classification models and the dataset created for their training. Finally, we discuss diverse research opportunities for the computer science community and reflect on broader technical and structural issues.}, journal = {Trans. Soc. Comput.}, month = oct, articleno = {11}, numpages = {56}, keywords = {abusive language, toxic language, cyberbullying, harmful language, discrimination, offensive language, harassment, Bias} } @inproceedings{Argano2019, author = {Arango, Aym\'{e} and P\'{e}rez, Jorge and Poblete, Barbara}, title = {Hate Speech Detection is Not as Easy as You May Think: A Closer Look at Model Validation}, year = {2019}, isbn = {9781450361729}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3331184.3331262}, doi = {10.1145/3331184.3331262}, abstract = {Hate speech is an important problem that is seriously affecting the dynamics and usefulness of online social communities. Large scale social platforms are currently investing important resources into automatically detecting and classifying hateful content, without much success. On the other hand, the results reported by state-of-the-art systems indicate that supervised approaches achieve almost perfect performance but only within specific datasets. In this work, we analyze this apparent contradiction between existing literature and actual applications. We study closely the experimental methodology used in prior work and their generalizability to other datasets. Our findings evidence methodological issues, as well as an important dataset bias. As a consequence, performance claims of the current state-of-the-art have become significantly overestimated. The problems that we have found are mostly related to data overfitting and sampling issues. We discuss the implications for current research and re-conduct experiments to give a more accurate picture of the current state-of-the art methods.}, booktitle = {Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval}, pages = {45–54}, numpages = {10}, keywords = {social media, deep learning, hate speech classification, experimental evaluation}, location = {Paris, France}, series = {SIGIR'19} } @article{Fast2016, author = {Ethan Fast and Tina Vachovsky and Michael S. Bernstein}, title = {Shirtless and Dangerous: Quantifying Linguistic Signals of Gender Bias in an Online Fiction Writing Community}, journal = {ICWSM}, volume = {abs/1603.08832}, year = {2016}, url = {http://arxiv.org/abs/1603.08832}, eprinttype = {arXiv}, eprint = {1603.08832}, timestamp = {Mon, 13 Aug 2018 16:47:22 +0200}, biburl = {https://dblp.org/rec/journals/corr/FastVB16.bib}, bibsource = {dblp computer science bibliography, https://dblp.org}, pages={112–120} } @article{Ging2019, author = {Debbie Ging}, title ={Alphas, Betas, and Incels: Theorizing the Masculinities of the Manosphere}, journal = {Men and Masculinities}, volume = {22}, number = {4}, pages = {638-657}, year = {2019}, doi = {10.1177/1097184X17706401}, URL = { https://doi.org/10.1177/1097184X17706401 }, eprint = { https://doi.org/10.1177/1097184X17706401 } , abstract = { Since the emergence of Web 2.0 and social media, a particularly toxic brand of antifeminism has become evident across a range of online networks and platforms. Despite multiple internal conflicts and contradictions, these diverse assemblages are generally united in their adherence to Red Pill “philosophy,” which purports to liberate men from a life of feminist delusion. This loose confederacy of interest groups, broadly known as the manosphere, has become the dominant arena for the communication of men’s rights in Western culture. This article identifies the key categories and features of the manosphere and subsequently seeks to theorize the masculinities that characterize this discursive space. The analysis reveals that, while there are some continuities with older variants of antifeminism, many of these new toxic assemblages appear to complicate the orthodox alignment of power and dominance with hegemonic masculinity by operationalizing tropes of victimhood, “beta masculinity,” and involuntary celibacy (incels). These new hybrid masculinities provoke important questions about the different functioning of male hegemony off- and online and indicate that the technological affordances of social media are especially well suited to the amplification of new articulations of aggrieved manhood. } } @inproceedings{Rychwalska2018, author = {Rychwalska, Agnieszka and Roszczyńska-Kurasińska, Magdalena}, year = {2018}, month = {01}, pages = {}, title = {Polarization on Social Media: When Group Dynamics Leads to Societal Divides}, doi = {10.24251/HICSS.2018.263} } @article {Bail2018, author = {Bail, Christopher A. and Argyle, Lisa P. and Brown, Taylor W. and Bumpus, John P. and Chen, Haohan and Hunzaker, M. B. Fallin and Lee, Jaemin and Mann, Marcus and Merhout, Friedolin and Volfovsky, Alexander}, title = {Exposure to opposing views on social media can increase political polarization}, volume = {115}, number = {37}, pages = {9216--9221}, year = {2018}, doi = {10.1073/pnas.1804840115}, publisher = {National Academy of Sciences}, abstract = {Social media sites are often blamed for exacerbating political polarization by creating {\textquotedblleft}echo chambers{\textquotedblright} that prevent people from being exposed to information that contradicts their preexisting beliefs. We conducted a field experiment that offered a large group of Democrats and Republicans financial compensation to follow bots that retweeted messages by elected officials and opinion leaders with opposing political views. Republican participants expressed substantially more conservative views after following a liberal Twitter bot, whereas Democrats{\textquoteright} attitudes became slightly more liberal after following a conservative Twitter bot{\textemdash}although this effect was not statistically significant. Despite several limitations, this study has important implications for the emerging field of computational social science and ongoing efforts to reduce political polarization online.There is mounting concern that social media sites contribute to political polarization by creating {\textquotedblleft}echo chambers{\textquotedblright} that insulate people from opposing views about current events. We surveyed a large sample of Democrats and Republicans who visit Twitter at least three times each week about a range of social policy issues. One week later, we randomly assigned respondents to a treatment condition in which they were offered financial incentives to follow a Twitter bot for 1 month that exposed them to messages from those with opposing political ideologies (e.g., elected officials, opinion leaders, media organizations, and nonprofit groups). Respondents were resurveyed at the end of the month to measure the effect of this treatment, and at regular intervals throughout the study period to monitor treatment compliance. We find that Republicans who followed a liberal Twitter bot became substantially more conservative posttreatment. Democrats exhibited slight increases in liberal attitudes after following a conservative Twitter bot, although these effects are not statistically significant. Notwithstanding important limitations of our study, these findings have significant implications for the interdisciplinary literature on political polarization and the emerging field of computational social science.}, issn = {0027-8424}, URL = {https://www.pnas.org/content/115/37/9216}, eprint = {https://www.pnas.org/content/115/37/9216.full.pdf}, journal = {Proceedings of the National Academy of Sciences} } @inproceedings{Kwok2013, author = {Kwok, Irene and Wang, Yuzhou}, title = {Locate the Hate: Detecting Tweets against Blacks}, year = {2013}, publisher = {AAAI Press}, abstract = {Although the social medium Twitter grants users freedom of speech, its instantaneous nature and retweeting features also amplify hate speech. Because Twitter has a sizeable black constituency, racist tweets against blacks are especially detrimental in the Twitter community, though this effect may not be obvious against a backdrop of half a billion tweets a day. We apply a supervised machine learning approach, employing inexpensively acquired labeled data from diverse Twitter accounts to learn a binary classifier for the labels "racist" and "nonracist" The classifier has a 76\% average accuracy on individual tweets, suggesting that with further improvements, our work can contribute data on the sources of anti-black hate speech.}, booktitle = {Proceedings of the Twenty-Seventh AAAI Conference on Artificial Intelligence}, pages = {1621–1622}, numpages = {2}, location = {Bellevue, Washington}, series = {AAAI'13}, doi = {10.5555/2891460.2891697} } @inproceedings{Aroyehun2018, title = "Aggression Detection in Social Media: Using Deep Neural Networks, Data Augmentation, and Pseudo Labeling", author = "Aroyehun, Segun Taofeek and Gelbukh, Alexander", booktitle = "Proceedings of the First Workshop on Trolling, Aggression and Cyberbullying ({TRAC}-2018)", month = aug, year = "2018", address = "Santa Fe, New Mexico, USA", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/W18-4411", pages = "90--97", abstract = "With the advent of the read-write web which facilitates social interactions in online spaces, the rise of anti-social behaviour in online spaces has attracted the attention of researchers. In this paper, we address the challenge of automatically identifying aggression in social media posts. Our team, saroyehun, participated in the English track of the Aggression Detection in Social Media Shared Task. On this task, we investigate the efficacy of deep neural network models of varying complexity. Our results reveal that deep neural network models require more data points to do better than an NBSVM linear baseline based on character n-grams. Our improved deep neural network models were trained on augmented data and pseudo labeled examples. Our LSTM classifier receives a weighted macro-F1 score of 0.6425 to rank first overall on the Facebook subtask of the shared task. On the social media sub-task, our CNN-LSTM model records a weighted macro-F1 score of 0.5920 to place third overall.", } @inproceedings{Xu2012, author = {Xu, Jun-Ming and Jun, Kwang-Sung and Zhu, Xiaojin and Bellmore, Amy}, title = {Learning from Bullying Traces in Social Media}, year = {2012}, isbn = {9781937284206}, publisher = {Association for Computational Linguistics}, address = {USA}, abstract = {We introduce the social study of bullying to the NLP community. Bullying, in both physical and cyber worlds (the latter known as cyberbullying), has been recognized as a serious national health issue among adolescents. However, previous social studies of bullying are handicapped by data scarcity, while the few computational studies narrowly restrict themselves to cyberbullying which accounts for only a small fraction of all bullying episodes. Our main contribution is to present evidence that social media, with appropriate natural language processing techniques, can be a valuable and abundant data source for the study of bullying in both worlds. We identify several key problems in using such data sources and formulate them as NLP tasks, including text classification, role labeling, sentiment analysis, and topic modeling. Since this is an introductory paper, we present baseline results on these tasks using off-the-shelf NLP solutions, and encourage the NLP community to contribute better models in the future.}, booktitle = {Proceedings of the 2012 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, pages = {656–666}, numpages = {11}, location = {Montreal, Canada}, series = {NAACL HLT '12}, doi = {10.5555/2382029.2382139} } @article{Burnap2015, author = {Burnap, Pete and Williams, Matthew L.}, title = {Cyber Hate Speech on Twitter: An Application of Machine Classification and Statistical Modeling for Policy and Decision Making}, journal = {Policy \& Internet}, volume = {7}, number = {2}, pages = {223-242}, keywords = {Twitter, hate speech, Internet, policy, machine classification, statistical modeling, cyber hate, ensemble classifier}, doi = {https://doi.org/10.1002/poi3.85}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/poi3.85}, eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/poi3.85}, abstract = {The use of “Big Data” in policy and decision making is a current topic of debate. The 2013 murder of Drummer Lee Rigby in Woolwich, London, UK led to an extensive public reaction on social media, providing the opportunity to study the spread of online hate speech (cyber hate) on Twitter. Human annotated Twitter data was collected in the immediate aftermath of Rigby's murder to train and test a supervised machine learning text classifier that distinguishes between hateful and/or antagonistic responses with a focus on race, ethnicity, or religion; and more general responses. Classification features were derived from the content of each tweet, including grammatical dependencies between words to recognize “othering” phrases, incitement to respond with antagonistic action, and claims of well-founded or justified discrimination against social groups. The results of the classifier were optimal using a combination of probabilistic, rule-based, and spatial-based classifiers with a voted ensemble meta-classifier. We demonstrate how the results of the classifier can be robustly utilized in a statistical model used to forecast the likely spread of cyber hate in a sample of Twitter data. The applications to policy and decision making are discussed.}, year = {2015} } @inproceedings{Hande2020, title = "{K}an{CMD}: {K}annada {C}ode{M}ixed Dataset for Sentiment Analysis and Offensive Language Detection", author = "Hande, Adeep and Priyadharshini, Ruba and Chakravarthi, Bharathi Raja", booktitle = "Proceedings of the Third Workshop on Computational Modeling of People's Opinions, Personality, and Emotion's in Social Media", month = dec, year = "2020", address = "Barcelona, Spain (Online)", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2020.peoples-1.6", pages = "54--63", abstract = "We introduce Kannada CodeMixed Dataset (KanCMD), a multi-task learning dataset for sentiment analysis and offensive language identification. The KanCMD dataset highlights two real-world issues from the social media text. First, it contains actual comments in code mixed text posted by users on YouTube social media, rather than in monolingual text from the textbook. Second, it has been annotated for two tasks, namely sentiment analysis and offensive language detection for under-resourced Kannada language. Hence, KanCMD is meant to stimulate research in under-resourced Kannada language on real-world code-mixed social media text and multi-task learning. KanCMD was obtained by crawling the YouTube, and a minimum of three annotators annotates each comment. We release KanCMD 7,671 comments for multitask learning research purpose.", } @inproceedings{Subies2021, title={EXIST2021: Detecting Sexism with Transformers and Translation-Augmented Data}, author={Guillem Garc{\'i}a Subies}, booktitle={IberLEF@SEPLN}, year={2021} } @inproceedings{Butt2021, title={Sexism Identification using BERT and Data Augmentation - EXIST2021}, author={Sabur Butt and Noman Ashraf and Grigori Sidorov and Alexander F. Gelbukh}, booktitle={IberLEF@SEPLN}, publisher={International Conference of the Spanish Society for Natural Language Processing SEPLN 2021}, year={2021}, address = {Spain}, url = {http://ceur-ws.org/Vol-2943/exist_paper4.pdf}, numpages={10} } @inproceedings{Schtz2021, title={Automatic Sexism Detection with Multilingual Transformer Models AIT FHSTP@EXIST2021}, author={Mina Sch{\"u}tz and Jaqueline Boeck and Daria Liakhovets and Djordje Slijepcevic and Armin Kirchknopf and Manuel Hecht and Johannes Bogensperger and Sven Schlarb and Alexander Schindler and Matthias Zeppelzauer}, booktitle={IberLEF@SEPLN}, year={2021} } @InProceedings{Koh2021, title = {WILDS: A Benchmark of in-the-Wild Distribution Shifts}, author = {Koh, Pang Wei and Sagawa, Shiori and Marklund, Henrik and Xie, Sang Michael and Zhang, Marvin and Balsubramani, Akshay and Hu, Weihua and Yasunaga, Michihiro and Phillips, Richard Lanas and Gao, Irena and Lee, Tony and David, Etienne and Stavness, Ian and Guo, Wei and Earnshaw, Berton and Haque, Imran and Beery, Sara M and Leskovec, Jure and Kundaje, Anshul and Pierson, Emma and Levine, Sergey and Finn, Chelsea and Liang, Percy}, booktitle = {Proceedings of the 38th International Conference on Machine Learning}, pages = {5637--5664}, year = {2021}, editor = {Meila, Marina and Zhang, Tong}, volume = {139}, series = {Proceedings of Machine Learning Research}, month = {18--24 Jul}, publisher = {PMLR}, pdf = {http://proceedings.mlr.press/v139/koh21a/koh21a.pdf}, url = {https://proceedings.mlr.press/v139/koh21a.html}, abstract = {Distribution shifts—where the training distribution differs from the test distribution—can substantially degrade the accuracy of machine learning (ML) systems deployed in the wild. Despite their ubiquity in the real-world deployments, these distribution shifts are under-represented in the datasets widely used in the ML community today. To address this gap, we present WILDS, a curated benchmark of 10 datasets reflecting a diverse range of distribution shifts that naturally arise in real-world applications, such as shifts across hospitals for tumor identification; across camera traps for wildlife monitoring; and across time and location in satellite imaging and poverty mapping. On each dataset, we show that standard training yields substantially lower out-of-distribution than in-distribution performance. This gap remains even with models trained by existing methods for tackling distribution shifts, underscoring the need for new methods for training models that are more robust to the types of distribution shifts that arise in practice. To facilitate method development, we provide an open-source package that automates dataset loading, contains default model architectures and hyperparameters, and standardizes evaluations. The full paper, code, and leaderboards are available at https://wilds.stanford.edu.} } @inbook{Farrel2019, author = {Farrell, Tracie and Fernandez, Miriam and Novotny, Jakub and Alani, Harith}, title = {Exploring Misogyny across the Manosphere in Reddit}, year = {2019}, isbn = {9781450362023}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3292522.3326045}, abstract = {The 'manosphere' has been a recent subject of feminist scholarship on the web. Serious accusations have been levied against it for its role in encouraging misogyny and violent threats towards women online, as well as for potentially radicalising lonely or disenfranchised men. Feminist scholars evidence this through a shift in the language and interests of some men's rights activists on the manosphere, away from traditional subjects of family law or mental health and towards more sexually explicit, violent, racist and homophobic language. In this paper, we study this phenomenon by investigating the flow of extreme language across seven online communities on Reddit, with openly misogynistic members (e.g., Men Going Their Own Way, Involuntarily Celibates), and investigate if and how misogynistic ideas spread within and across these communities. Grounded on feminist critiques of language, we created nine lexicons capturing specific misogynistic rhetoric (Physical Violence, Sexual Violence, Hostility, Patriarchy, Stoicism, Racism, Homophobia, Belittling, and Flipped Narrative) and used these lexicons to explore how language evolves within and across misogynistic groups. This analysis was conducted on 6 million posts, from 300K conversations created between 2011 and December 2018. Our results shows increasing patterns on misogynistic content and users as well as violent attitudes, corroborating existing theories of feminist studies that the amount of misogyny, hostility and violence is steadily increasing in the manosphere.}, booktitle = {Proceedings of the 10th ACM Conference on Web Science}, pages = {87–96}, numpages = {10} } @article{Ribeiro2020, author = {Manoel Horta Ribeiro and Jeremy Blackburn and Barry Bradlyn and Emiliano De Cristofaro and Gianluca Stringhini and Summer Long and Stephanie Greenberg and Savvas Zannettou}, title = {From Pick-Up Artists to Incels: {A} Data-Driven Sketch of the Manosphere}, journal = {CoRR}, volume = {abs/2001.07600}, year = {2020}, url = {https://arxiv.org/abs/2001.07600}, eprinttype = {arXiv}, eprint = {2001.07600}, timestamp = {Fri, 24 Jan 2020 15:00:57 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2001-07600.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{LaViolette2019, title={Using Platform Signals for Distinguishing Discourses: The Case of Men's Rights and Men's Liberation on Reddit}, author={Jack LaViolette and Bernie Hogan}, booktitle={ICWSM}, year={2019} } @inproceedings{Papakyriakopoulos2020, author = {Papakyriakopoulos, Orestis and Hegelich, Simon and Serrano, Juan Carlos Medina and Marco, Fabienne}, title = {Bias in Word Embeddings}, year = {2020}, isbn = {9781450369367}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3351095.3372843}, doi = {10.1145/3351095.3372843}, abstract = {Word embeddings are a widely used set of natural language processing techniques that map words to vectors of real numbers. These vectors are used to improve the quality of generative and predictive models. Recent studies demonstrate that word embeddings contain and amplify biases present in data, such as stereotypes and prejudice. In this study, we provide a complete overview of bias in word embeddings. We develop a new technique for bias detection for gendered languages and use it to compare bias in embeddings trained on Wikipedia and on political social media data. We investigate bias diffusion and prove that existing biases are transferred to further machine learning models. We test two techniques for bias mitigation and show that the generally proposed methodology for debiasing models at the embeddings level is insufficient. Finally, we employ biased word embeddings and illustrate that they can be used for the detection of similar biases in new data. Given that word embeddings are widely used by commercial companies, we discuss the challenges and required actions towards fair algorithmic implementations and applications.}, booktitle = {Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency}, pages = {446–457}, numpages = {12}, keywords = {mitigation, racism, sexism, detection, word embeddings, diffusion, fairness, bias, homophobia}, location = {Barcelona, Spain}, series = {FAT* '20} } @article{Mikolov2013, author = {Mikolov, Tomas and Chen, Kai and Corrado, G.s and Dean, Jeffrey}, year = {2013}, month = {01}, pages = {}, title = {Efficient Estimation of Word Representations in Vector Space}, volume = {2013}, journal = {Proceedings of Workshop at ICLR} } @article{icwsm21manosphere, title={The Evolution of the Manosphere across the Web}, volume={15}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={Horta Ribeiro, Manoel and Blackburn, Jeremy and Bradlyn, Barry and De Cristofaro, Emiliano and Stringhini, Gianluca and Long, Summer and Greenberg, Stephanie and Zannettou, Savvas}, year={2021}, month={May}, pages={196-207} } @article{pushshift, author = {Jason Baumgartner and Savvas Zannettou and Brian Keegan and Megan Squire and Jeremy Blackburn}, title = {The Pushshift Reddit Dataset}, journal = {ICWSM}, volume = {abs/2001.08435}, year = {2020}, url = {https://arxiv.org/abs/2001.08435}, eprinttype = {arXiv}, eprint = {2001.08435}, timestamp = {Fri, 24 Jan 2020 15:00:57 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2001-08435.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{guest2021expert, title = "An Expert Annotated Dataset for the Detection of Online Misogyny", author = "Guest, Ella and Vidgen, Bertie and Mittos, Alexandros and Sastry, Nishanth and Tyson, Gareth and Margetts, Helen", booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume", month = apr, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.eacl-main.114", doi = "10.18653/v1/2021.eacl-main.114", pages = "1336--1350", abstract = "Online misogyny is a pernicious social problem that risks making online platforms toxic and unwelcoming to women. We present a new hierarchical taxonomy for online misogyny, as well as an expert labelled dataset to enable automatic classification of misogynistic content. The dataset consists of 6567 labels for Reddit posts and comments. As previous research has found untrained crowdsourced annotators struggle with identifying misogyny, we hired and trained annotators and provided them with robust annotation guidelines. We report baseline classification performance on the binary classification task, achieving accuracy of 0.93 and F1 of 0.43. The codebook and datasets are made freely available for future researchers.", } @article{Grosz2020, author = {Grosz, Dylan and Conde-Céspedes, Patricia}, year = {2020}, month = {07}, pages = {}, title = {Automatic Detection of Sexist Statements Commonly Used at the Workplace} } @inproceedings{paul2016anonymine, title={AnonyMine: Mining anonymous social media posts using psycho-lingual and crowd-sourced dictionaries}, author={Paul, Arindam and Agrawal, Ankit and Liao, Wei-keng and Choudhary, Alok}, booktitle={Proceedings of KDD}, year={2016} } @inproceedings{ireland2020profiling, title={Profiling Depression in Neutral Reddit Posts}, author={Ireland, Molly E and Schler, Jonathan and Gecht, G Niederhoffer and Niederhoffer, Kate G}, booktitle={GOOD Workshop KDD}, volume={20}, pages={2020}, year={2020} } @inproceedings{garcia2018context, title={Context: The missing piece in the machine learning lifecycle}, author={Garcia, Rolando and Sreekanti, Vikram and Yadwadkar, Neeraja and Crankshaw, Daniel and Gonzalez, Joseph E and Hellerstein, Joseph M}, booktitle={KDD CMI Workshop}, volume={114}, pages={368}, year={2018} } @misc{he2023prompt, title={You Only Prompt Once: On the Capabilities of Prompt Learning on Large Language Models to Tackle Toxic Content}, author={Xinlei He and Savvas Zannettou and Yun Shen and Yang Zhang}, year={2023}, eprint={2308.05596}, archivePrefix={arXiv}, primaryClass={cs.CL} } @inproceedings{guest-etal-2021-expert, title = "An Expert Annotated Dataset for the Detection of Online Misogyny", author = "Guest, Ella and Vidgen, Bertie and Mittos, Alexandros and Sastry, Nishanth and Tyson, Gareth and Margetts, Helen", booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume", month = apr, year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.eacl-main.114", doi = "10.18653/v1/2021.eacl-main.114", pages = "1336--1350", abstract = "Online misogyny is a pernicious social problem that risks making online platforms toxic and unwelcoming to women. We present a new hierarchical taxonomy for online misogyny, as well as an expert labelled dataset to enable automatic classification of misogynistic content. The dataset consists of 6567 labels for Reddit posts and comments. As previous research has found untrained crowdsourced annotators struggle with identifying misogyny, we hired and trained annotators and provided them with robust annotation guidelines. We report baseline classification performance on the binary classification task, achieving accuracy of 0.93 and F1 of 0.43. The codebook and datasets are made freely available for future researchers.", } @inproceedings{chernyshev-etal-2023-lct, title = "{LCT}-1 at {S}em{E}val-2023 Task 10: Pre-training and Multi-task Learning for Sexism Detection and Classification", author = "Chernyshev, Konstantin and Garanina, Ekaterina and Bayram, Duygu and Zheng, Qiankun and Edman, Lukas", booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)", month = jul, year = "2023", address = "Toronto, Canada", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.semeval-1.217", doi = "10.18653/v1/2023.semeval-1.217", pages = "1573--1581", abstract = "Misogyny and sexism are growing problems in social media. Advances have been made in online sexism detection but the systems are often uninterpretable. SemEval-2023 Task 10 on Explainable Detection of Online Sexism aims at increasing explainability of the sexism detection, and our team participated in all the proposed subtasks. Our system is based on further domain-adaptive pre-training. Building on the Transformer-based models with the domain adaptation, we compare fine-tuning with multi-task learning and show that each subtask requires a different system configuration. In our experiments, multi-task learning performs on par with standard fine-tuning for sexism detection and noticeably better for coarse-grained sexism classification, while fine-tuning is preferable for fine-grained classification.", } @article{horta2021platform, title={Do platform migrations compromise content moderation? evidence from r/the\_donald and r/incels}, author={Horta Ribeiro, Manoel and Jhaver, Shagun and Zannettou, Savvas and Blackburn, Jeremy and Stringhini, Gianluca and De Cristofaro, Emiliano and West, Robert}, journal={Proceedings of the ACM on Human-Computer Interaction}, volume={5}, number={CSCW2}, pages={1--24}, year={2021}, publisher={ACM New York, NY, USA} } @article{papadamou2021over, title={"How over is it?" Understanding the Incel Community on YouTube}, author={Papadamou, Kostantinos and Zannettou, Savvas and Blackburn, Jeremy and De Cristofaro, Emiliano and Stringhini, Gianluca and Sirivianos, Michael}, journal={Proceedings of the ACM on Human-Computer Interaction}, volume={5}, number={CSCW2}, pages={1--25}, year={2021}, publisher={ACM New York, NY, USA} } @article{chandrasekharan2018internet, title={The Internet's hidden rules: An empirical study of Reddit norm violations at micro, meso, and macro scales}, author={Chandrasekharan, Eshwar and Samory, Mattia and Jhaver, Shagun and Charvat, Hunter and Bruckman, Amy and Lampe, Cliff and Eisenstein, Jacob and Gilbert, Eric}, journal={Proceedings of the ACM on Human-Computer Interaction}, volume={2}, number={CSCW}, pages={1--25}, year={2018}, publisher={ACM New York, NY, USA} } @article{mariconti2019you, title={"You Know What to Do" Proactive Detection of YouTube Videos Targeted by Coordinated Hate Attacks}, author={Mariconti, Enrico and Suarez-Tangil, Guillermo and Blackburn, Jeremy and De Cristofaro, Emiliano and Kourtellis, Nicolas and Leontiadis, Ilias and Serrano, Jordi Luque and Stringhini, Gianluca}, journal={Proceedings of the ACM on Human-Computer Interaction}, volume={3}, number={CSCW}, pages={1--21}, year={2019}, publisher={ACM New York, NY, USA} } @article{rho2018fostering, title={Fostering civil discourse online: Linguistic behavior in comments of \#metoo articles across political perspectives}, author={Rho, Eugenia Ha Rim and Mark, Gloria and Mazmanian, Melissa}, journal={Proceedings of the ACM on human-computer interaction}, volume={2}, number={CSCW}, pages={1--28}, year={2018}, publisher={ACM New York, NY, USA} } @article{freeman2020streaming, title={Streaming your identity: Navigating the presentation of gender and sexuality through live streaming}, author={Freeman, Guo and Wohn, Donghee Yvette}, journal={Computer Supported Cooperative Work (CSCW)}, volume={29}, pages={795--825}, year={2020}, publisher={Springer} } @article{borsotti2022humor, title={Humor and stereotypes in computing: An equity-focused approach to institutional accountability}, author={Borsotti, Valeria and Bj{\o}rn, Pernille}, journal={Computer Supported Cooperative Work (CSCW)}, volume={31}, number={4}, pages={771--803}, year={2022}, publisher={Springer} } @article{messing2021observing, title={Observing Inequality: Can Ergonomic Observations Help Interventions Transform the Role of Gender in Work Activity?}, author={Messing, Karen and Lefran{\c{c}}ois, M{\'e}lanie and Saint-Charles, Johanne}, journal={Computer Supported Cooperative Work (CSCW)}, volume={30}, pages={215--249}, year={2021}, publisher={Springer} } @inproceedings{rode2018understanding, title={Understanding Gender Equity in Author Order Assignment}, author={Rode, J and Kirstin, E and Jessica, H and Megan Kelly, H and Anna, W and Jennifer, M}, booktitle={Proceedings of the ACM on Human-Computer Interaction-CSCW archive Volume 2 Issue CSCW, November 2018}, volume={21}, year={2018}, organization={ACM} } @article{paakki2021disruptive, title={Disruptive online communication: How asymmetric trolling-like response strategies steer conversation off the track}, author={Paakki, Henna and Veps{\"a}l{\"a}inen, Heidi and Salovaara, Antti}, journal={Computer Supported Cooperative Work (CSCW)}, volume={30}, number={3}, pages={425--461}, year={2021}, publisher={Springer} } @article{humphry2014officing, title={Officing: Mediating time and the professional self in the support of nomadic work}, author={Humphry, Justine}, journal={Computer Supported Cooperative Work (CSCW)}, volume={23}, pages={185--204}, year={2014}, publisher={Springer} } @book{em:86, editor = "Engelmore, Robert and Morgan, Anthony", title = "Blackboard Systems", year = 1986, address = "Reading, Mass.", publisher = "Addison-Wesley", } @article{aczel2020consensus, title={A consensus-based transparency checklist}, author={Aczel, Balazs and Szaszi, Barnabas and Sarafoglou, Alexandra and Kekecs, Zoltan and Kucharsk{\`y}, {\v{S}}imon and Benjamin, Daniel and Chambers, Christopher D and Fisher, Agneta and Gelman, Andrew and Gernsbacher, Morton A and others}, journal={Nature human behaviour}, volume={4}, number={1}, pages={4--6}, year={2020}, publisher={Nature Publishing Group UK London} } @inproceedings{c:83, author = "Clancey, William J.", year = 1983, title = "{Communication, Simulation, and Intelligent Agents: Implications of Personal Intelligent Machines for Medical Education}", booktitle="Proceedings of the Eighth International Joint Conference on Artificial Intelligence {(IJCAI-83)}", pages = "556-560", address = "Menlo Park, Calif", publisher = "{IJCAI Organization}", } @inproceedings{c:84, author = "Clancey, William J.", year = 1984, title = "{Classification Problem Solving}", booktitle = "Proceedings of the Fourth National Conference on Artificial Intelligence", pages = "45-54", address = "Menlo Park, Calif.", publisher="AAAI Press", } @article{r:80, author = {Robinson, Arthur L.}, title = {New Ways to Make Microcircuits Smaller}, volume = {208}, number = {4447}, pages = {1019--1022}, year = {1980}, doi = {10.1126/science.208.4447.1019}, publisher = {American Association for the Advancement of Science}, issn = {0036-8075}, URL = {https://science.sciencemag.org/content/208/4447/1019}, eprint = {https://science.sciencemag.org/content/208/4447/1019.full.pdf}, journal = {Science}, } @article{r:80x, author = "Robinson, Arthur L.", year = 1980, title = "{New Ways to Make Microcircuits Smaller---Duplicate Entry}", journal = "Science", volume = 208, pages = "1019-1026", } @article{hcr:83, title = {Strategic explanations for a diagnostic consultation system}, journal = {International Journal of Man-Machine Studies}, volume = {20}, number = {1}, pages = {3-19}, year = {1984}, issn = {0020-7373}, doi = {https://doi.org/10.1016/S0020-7373(84)80003-6}, url = {https://www.sciencedirect.com/science/article/pii/S0020737384800036}, author = {Diane Warner Hasling and William J. Clancey and Glenn Rennels}, abstract = {This article examines the problem of automatte explanation of reasoning, especially as it relates to expert systems. By explanation we mean the ability of a program to discuss what it is doing in some understandable way. We first present a general framework in which to view explanation and review some of the research done in this area. We then focus on the explanation system for NEOMYCIN, a medical consultation program. A consultation program interactively helps a user to solve a problem. Our goal is to have NEOMYCIN explain its problem-solving strategies. An explanation of strategy describes the plan the program is using to reach a solution. Such an explanation is usually concrete, referring to aspects of the current problem situation. Abstract explanations articulate a general principle, which can be applied in different situations; such explanations are useful in teaching and in explaining by analogy. We describe the aspects of NEOMYCIN that make abstract strategic explanations possible—the representation of strategic knowledge explicitly and separately from domain knowledge— and demonstrate how this representation can be used to generate explanations.} } @article{hcrt:83, author = "Hasling, Diane Warner and Clancey, William J. and Rennels, Glenn R. and Test, Thomas", year = 1983, title = "{Strategic Explanations in Consultation---Duplicate}", journal = "The International Journal of Man-Machine Studies", volume = 20, number = 1, pages = "3-19", } @techreport{r:86, author = "Rice, James", year = 1986, title = "{Poligon: A System for Parallel Problem Solving}", type = "Technical Report", number = "KSL-86-19", institution = "Dept.\ of Computer Science, Stanford Univ.", } @phdthesis{c:79, author = "Clancey, William J.", year = 1979, title = "{Transfer of Rule-Based Expertise through a Tutorial Dialogue}", type = "{Ph.D.} diss.", school = "Dept.\ of Computer Science, Stanford Univ.", address = "Stanford, Calif.", } @unpublished{c:21, author = "Clancey, William J.", title = "{The Engineering of Qualitative Models}", year = 2021, note = "Forthcoming", } @misc{c:22, title={Crime and punishment in scientific research}, author={Mathieu Bouville}, year={2008}, eprint={0803.4058}, archivePrefix={arXiv}, primaryClass={physics.soc-ph} } @article{gebru2021datasheets, title={Datasheets for datasets}, author={Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Iii, Hal Daum{\'e} and Crawford, Kate}, journal={Communications of the ACM}, volume={64}, number={12}, pages={86--92}, year={2021}, publisher={ACM New York, NY, USA} } @article{ashurst2020guide, title={A guide to writing the NeurIPS impact statement}, author={Ashurst, Carolyn and Anderljung, Markus and Prunkl, Carina and Leike, Jan and Gal, Yarin and Shevlane, Toby and Dafoe, Allan}, journal={Centre for the Governance of AI. URL: https://perma. cc/B5R8-2B9V}, year={2020} } @inproceedings{benotti2023understanding, title={Understanding Ethics in NLP Authoring and Reviewing}, author={Benotti, Luciana and Fort, Kar{\"e}n and Kan, Min-Yen and Tsvetkov, Yulia}, booktitle={Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts}, pages={19--24}, year={2023} } @misc{neurips, title = "NeurIPS 2021 Paper Checklist Guidelines", author = "{NeurIPS}", howpublished = "\url{https://neurips.cc/Conferences/2021/PaperInformation/PaperChecklist}", year = 2021 } @misc{fair, title="The FAIR Data principles", year = 2020, author="{FORCE11}", howpublished="\url{https://force11.org/info/the-fair-data-principles/}" } @misc{c:23, title = "Pluto: The 'Other' Red Planet", author = "{NASA}", howpublished = "\url{https://www.nasa.gov/nh/pluto-the-other-red-planet}", year = 2015, note = "Accessed: 2018-12-06" } @inproceedings{chen2022personalized, title={A Personalized Cross-Platform Post Style Transfer Method Based on Transformer and Bi-Attention Mechanism}, author={Chen, Zhuo and Liu, Baoxi and Zhang, Peng and Lu, Tun and Gu, Hansu and Gu, Ning}, booktitle={Proceedings of the Fifteenth ACM International Conference on Web Search and Data Mining}, pages={85--93}, year={2022} } @inproceedings{wallach2006topic, title={Topic modeling: beyond bag-of-words}, author={Wallach, Hanna M}, booktitle={Proceedings of the 23rd international conference on Machine learning}, pages={977--984}, year={2006} } @article{liu1999statistical, title={Statistical properties of the volatility of price fluctuations}, author={Liu, Yanhui and Gopikrishnan, Parameswaran and Stanley, H Eugene and others}, journal={Physical review e}, volume={60}, number={2}, pages={1390}, year={1999}, publisher={APS} } @misc{rayson2004ucrel, title={The UCREL semantic analysis system.}, author={Rayson, Paul and Archer, Dawn and Piao, Scott and McEnery, Anthony M}, year={2004} } @article{seering2023moderates, title={Who Moderates on Twitch and What Do They Do? Quantifying Practices in Community Moderation on Twitch}, author={Seering, Joseph and Kairam, Sanjay R}, journal={Proceedings of the ACM on Human-Computer Interaction}, volume={7}, number={GROUP}, pages={1--18}, year={2023}, publisher={ACM New York, NY, USA} } @article{li2020survey, title={A survey on deep learning for named entity recognition}, author={Li, Jing and Sun, Aixin and Han, Jianglei and Li, Chenliang}, journal={IEEE Transactions on Knowledge and Data Engineering}, volume={34}, number={1}, pages={50--70}, year={2020}, publisher={IEEE} } @article{devlin2018bert, title={Bert: Pre-training of deep bidirectional transformers for language understanding}, author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, journal={arXiv preprint arXiv:1810.04805}, year={2018} } @article{sarzynska2021detecting, title={Detecting formal thought disorder by deep contextualized word representations}, author={Sarzynska-Wawer, Justyna and Wawer, Aleksander and Pawlak, Aleksandra and Szymanowska, Julia and Stefaniak, Izabela and Jarkiewicz, Michal and Okruszek, Lukasz}, journal={Psychiatry Research}, volume={304}, pages={114135}, year={2021}, publisher={Elsevier} } @inproceedings{zhong2017wearing, title={Wearing many (social) hats: How different are your different social network personae?}, author={Zhong, Changtao and Chang, Hau-wen and Karamshuk, Dmytro and Lee, Dongwon and Sastry, Nishanth}, booktitle={Proceedings of the International AAAI Conference on Web and Social Media}, volume={11}, number={1}, pages={397--406}, year={2017} } @article{kim2021human, title={A Human-Centered Systematic Literature Review of Cyberbullying Detection Algorithms}, author={Kim, Seunghyun and Razi, Afsaneh and Stringhini, Gianluca and Wisniewski, Pamela J and De Choudhury, Munmun}, journal={Proceedings of the ACM on Human-Computer Interaction}, volume={5}, number={CSCW2}, pages={1--34}, year={2021}, publisher={ACM New York, NY, USA} } @inproceedings{thomas2021sok, title={Sok: Hate, harassment, and the changing landscape of online abuse}, author={Thomas, Kurt and Akhawe, Devdatta and Bailey, Michael and Boneh, Dan and Bursztein, Elie and Consolvo, Sunny and Dell, Nicola and Durumeric, Zakir and Kelley, Patrick Gage and Kumar, Deepak and others}, booktitle={2021 IEEE Symposium on Security and Privacy (SP)}, pages={247--267}, year={2021}, organization={IEEE} } @inproceedings{jaidka2018facebook, title={Facebook versus Twitter: Differences in self-disclosure and trait prediction}, author={Jaidka, Kokil and Guntuku, Sharath and Ungar, Lyle}, booktitle={Proceedings of the International AAAI Conference on Web and Social Media}, volume={12}, number={1}, year={2018} } @article{zhang2021studying, title={Studying and understanding characteristics of post-syncing practice and goal in social network sites}, author={Zhang, Peng and Liu, Baoxi and Ding, Xianghua and Lu, Tun and Gu, Hansu and Gu, Ning}, journal={ACM Transactions on the Web (TWEB)}, volume={15}, number={4}, pages={1--26}, year={2021}, publisher={ACM New York, NY} } @inproceedings{manikonda2016tweeting, title={Tweeting the mind and instagramming the heart: Exploring differentiated content sharing on social media}, author={Manikonda, Lydia and Meduri, Venkata Vamsikrishna and Kambhampati, Subbarao}, booktitle={Tenth international AAAI conference on web and social media}, year={2016} } @inproceedings{lin2013two, title={Two sites, two voices: Linguistic differences between Facebook status updates and tweets}, author={Lin, Han and Qiu, Lin}, booktitle={International Conference on Cross-Cultural Design}, pages={432--440}, year={2013}, organization={Springer} } @inproceedings{chen2014understanding, title={Understanding cross-site linking in online social networks}, author={Chen, Yang and Zhuang, Chenfan and Cao, Qiang and Hui, Pan}, booktitle={Proceedings of the 8th Workshop on Social Network Mining and Analysis}, pages={1--9}, year={2014} } @inproceedings{radfar2020characterizing, title={Characterizing variation in toxic language by social context}, author={Radfar, Bahar and Shivaram, Karthik and Culotta, Aron}, booktitle={Proceedings of the International AAAI Conference on Web and Social Media}, volume={14}, pages={959--963}, year={2020} } @article{shu2017user, title={User identity linkage across online social networks: A review}, author={Shu, Kai and Wang, Suhang and Tang, Jiliang and Zafarani, Reza and Liu, Huan}, journal={Acm Sigkdd Explorations Newsletter}, volume={18}, number={2}, pages={5--17}, year={2017}, publisher={ACM New York, NY, USA} } @article{anderson2015ask, title={Ask me anything: what is Reddit?}, author={Anderson, Katie Elson}, journal={Library Hi Tech News}, year={2015}, publisher={Emerald Group Publishing Limited} } @article{caers2013facebook, title={Facebook: A literature review}, author={Caers, Ralf and De Feyter, Tim and De Couck, Marijke and Stough, Talia and Vigna, Claudia and Du Bois, Cind}, journal={New media \& society}, volume={15}, number={6}, pages={982--1002}, year={2013}, publisher={Sage Publications Sage UK: London, England} } @book{murthy2018twitter, title={Twitter}, author={Murthy, Dhiraj}, year={2018}, publisher={Polity Press Cambridge} } @inproceedings{danescu2013no, title={No country for old members: User lifecycle and linguistic change in online communities}, author={Danescu-Niculescu-Mizil, Cristian and West, Robert and Jurafsky, Dan and Leskovec, Jure and Potts, Christopher}, booktitle={Proceedings of the 22nd international conference on World Wide Web}, pages={307--318}, year={2013} } @article{chandrasekharan2018internet, title={The Internet's hidden rules: An empirical study of Reddit norm violations at micro, meso, and macro scales}, author={Chandrasekharan, Eshwar and Samory, Mattia and Jhaver, Shagun and Charvat, Hunter and Bruckman, Amy and Lampe, Cliff and Eisenstein, Jacob and Gilbert, Eric}, journal={Proceedings of the ACM on Human-Computer Interaction}, volume={2}, number={CSCW}, pages={1--25}, year={2018}, publisher={ACM New York, NY, USA} } @inproceedings{jurgens2011word, title={Word sense induction by community detection}, author={Jurgens, David}, booktitle={Proceedings of TextGraphs-6: Graph-based Methods for Natural Language Processing}, pages={24--28}, year={2011} } @article{del2018semantic, title={Semantic variation in online communities of practice}, author={Del Tredici, Marco and Fern{\'a}ndez, Raquel}, journal={arXiv preprint arXiv:1806.05847}, year={2018} } @article{yin2009detection, title={Detection of harassment on web 2.0}, author={Yin, Dawei and Xue, Zhenzhen and Hong, Liangjie and Davison, Brian D and Kontostathis, April and Edwards, Lynne}, journal={Proceedings of the Content Analysis in the WEB}, volume={2}, pages={1--7}, year={2009}, publisher={Madrid, Spain} } @article{al2016cybercrime, title={Cybercrime detection in online communications: The experimental case of cyberbullying detection in the Twitter network}, author={Al-Garadi, Mohammed Ali and Varathan, Kasturi Dewi and Ravana, Sri Devi}, journal={Computers in Human Behavior}, volume={63}, pages={433--443}, year={2016}, publisher={Elsevier} } @inproceedings{davidson2017automated, title={Automated hate speech detection and the problem of offensive language}, author={Davidson, Thomas and Warmsley, Dana and Macy, Michael and Weber, Ingmar}, booktitle={Proceedings of the International AAAI Conference on Web and Social Media}, volume={11}, number={1}, pages={512--515}, year={2017} } @inproceedings{cheng2017anyone, title={Anyone can become a troll: Causes of trolling behavior in online discussions}, author={Cheng, Justin and Bernstein, Michael and Danescu-Niculescu-Mizil, Cristian and Leskovec, Jure}, booktitle={Proceedings of the 2017 ACM conference on computer supported cooperative work and social computing}, pages={1217--1230}, year={2017} } @inproceedings{kumar2017antisocial, title={Antisocial behavior on the web: Characterization and detection}, author={Kumar, Srijan and Cheng, Justin and Leskovec, Jure}, booktitle={Proceedings of the 26th International Conference on World Wide Web Companion}, pages={947--950}, year={2017} } @inproceedings{liu2018forecasting, title={Forecasting the presence and intensity of hostility on Instagram using linguistic and social features}, author={Liu, Ping and Guberman, Joshua and Hemphill, Libby and Culotta, Aron}, booktitle={Twelfth international aaai conference on web and social media}, year={2018} } @article{zhang2018conversations, title={Conversations gone awry: Detecting early signs of conversational failure}, author={Zhang, Justine and Chang, Jonathan P and Danescu-Niculescu-Mizil, Cristian and Dixon, Lucas and Hua, Yiqing and Thain, Nithum and Taraborelli, Dario}, journal={arXiv preprint arXiv:1805.05345}, year={2018} } @article{garside1997hybrid, title={A hybrid grammatical tagger: CLAWS 4}, author={Garside, Roger}, journal={Corpus annotation: Linguistic information from computer text corpora}, year={1997}, publisher={Longman} } @inproceedings{leech1994claws4, title={CLAWS4: the tagging of the British National Corpus}, author={Leech, Geoffrey and Garside, Roger and Bryant, Michael}, booktitle={COLING 1994 Volume 1: The 15th International Conference on Computational Linguistics}, year={1994} } @article{wilson1993automatic, title={Automatic content analysis of spoken discourse: a report on work in progress}, author={Wilson, Andrew and Rayson, Paul}, journal={Corpus based computational linguistics}, pages={215--226}, year={1993} } @article{sharoff2006assist, title={ASSIST: Automated semantic assistance for translators}, author={Sharoff, Serge and Babych, Bogdan and Rayson, Paul and Mudraya, Olga and Piao, Scott}, year={2006} } @article{Ruan2022, author = {Ruan, Tao and Kong, Qingkai and McBride, Sara and Sethjiwala, Amatullah and Lv, Qin}, year = {2022}, month = {01}, pages = {}, title = {Cross-platform analysis of public responses to the 2019 Ridgecrest earthquake sequence on Twitter and Reddit}, volume = {12}, journal = {Scientific Reports}, doi = {10.1038/s41598-022-05359-9} } @article{Hall2018, author = {Margeret Hall and Athanasios Mazarakis and Martin Chorley and Simon Caton}, title = {Editorial of the Special Issue on Following User Pathways: Key Contributions and Future Directions in Cross-Platform Social Media Research}, journal = {International Journal of Human–Computer Interaction}, volume = {34}, number = {10}, pages = {895-912}, year = {2018}, publisher = {Taylor & Francis}, doi = {10.1080/10447318.2018.1471575}, URL = { https://doi.org/10.1080/10447318.2018.1471575 }, eprint = { https://doi.org/10.1080/10447318.2018.1471575 } , abstract = { ABSTRACTSocial media and the resulting tidal wave of the available data have changed the ways and methods researchers analyze communities at scale. But the full potential for social scientists (and others) is not yet achieved. Despite the popularity of social media analysis in the past decade, few researchers invest in cross-platform analyses. This is a major oversight as a majority of online social media users have multiple social media accounts. Missing are the models and tools necessary to undertake analysis at scale across multiple platforms. Especially promising in support of cross-platform analysis is the mixed-method approach (e.g., qualitative and quantitative methods) in order to better understand how users and society interact online. This special issue “Following User Pathways” addresses methodological, analytical, conceptual, and technological challenges and opportunities of cross-platform analysis in social media ecosystems. } } @article{van2019echo, title={The echo chamber of anti-vaccination conspiracies: mechanisms of radicalization on Facebook and Reddit}, author={Van Raemdonck, Nathalie}, journal={Institute for Policy, Advocacy and Governance (IPAG) Knowledge Series, Forthcoming}, year={2019} } @INPROCEEDINGS{Vicario2017, author={Vicario, Michela Del and Gaito, Sabrina and Quattrociocchi, Walter and Zignani, Matteo and Zollo, Fabiana}, booktitle={2017 IEEE International Conference on Data Science and Advanced Analytics (DSAA)}, title={News Consumption during the Italian Referendum: A Cross-Platform Analysis on Facebook and Twitter}, year={2017}, volume={}, number={}, pages={648-657}, doi={10.1109/DSAA.2017.33} } @article{Yang2021, author = {Kai-Cheng Yang and Francesco Pierri and Pik-Mai Hui and David Axelrod and Christopher Torres-Lugo and John Bryden and Filippo Menczer}, title ={The COVID-19 Infodemic: Twitter versus Facebook}, journal = {Big Data \& Society}, volume = {8}, number = {1}, pages = {20539517211013861}, year = {2021}, doi = {10.1177/20539517211013861}, URL = { https://doi.org/10.1177/20539517211013861 }, eprint = { https://doi.org/10.1177/20539517211013861 } , abstract = { The global spread of the novel coronavirus is affected by the spread of related misinformation—the so-called COVID-19 Infodemic—that makes populations more vulnerable to the disease through resistance to mitigation efforts. Here, we analyze the prevalence and diffusion of links to low-credibility content about the pandemic across two major social media platforms, Twitter and Facebook. We characterize cross-platform similarities and differences in popular sources, diffusion patterns, influencers, coordination, and automation. Comparing the two platforms, we find divergence among the prevalence of popular low-credibility sources and suspicious videos. A minority of accounts and pages exert a strong influence on each platform. These misinformation “superspreaders” are often associated with the low-credibility sources and tend to be verified by the platforms. On both platforms, there is evidence of coordinated sharing of Infodemic content. The overt nature of this manipulation points to the need for societal-level solutions in addition to mitigation strategies within the platforms. However, we highlight limits imposed by inconsistent data-access policies on our capability to study harmful manipulations of information ecosystems. } } @inproceedings{tahmasbi2021go, title={``Go eat a bat, Chang!'': On the Emergence of Sinophobic Behavior on Web Communities in the Face of COVID-19}, author={Tahmasbi, Fatemeh and Schild, Leonard and Ling, Chen and Blackburn, Jeremy and Stringhini, Gianluca and Zhang, Yang and Zannettou, Savvas}, booktitle={Proceedings of the web conference 2021}, pages={1122--1133}, year={2021} } @inproceedings{ribeiro2021evolution, title={The evolution of the manosphere across the Web}, author={Ribeiro, Manoel Horta and Blackburn, Jeremy and Bradlyn, Barry and De Cristofaro, Emiliano and Stringhini, Gianluca and Long, Summer and Greenberg, Stephanie and Zannettou, Savvas}, booktitle={Proceedings of the International AAAI Conference on Web and Social Media}, volume={15}, pages={196--207}, year={2021} } @inproceedings{zannettou2018origins, title={On the origins of memes by means of fringe web communities}, author={Zannettou, Savvas and Caulfield, Tristan and Blackburn, Jeremy and De Cristofaro, Emiliano and Sirivianos, Michael and Stringhini, Gianluca and Suarez-Tangil, Guillermo}, booktitle={Proceedings of the Internet Measurement Conference 2018}, pages={188--202}, year={2018} } @inproceedings{ali2021understanding, title={Understanding the effect of deplatforming on social networks}, author={Ali, Shiza and Saeed, Mohammad Hammas and Aldreabi, Esraa and Blackburn, Jeremy and De Cristofaro, Emiliano and Zannettou, Savvas and Stringhini, Gianluca}, booktitle={13th ACM Web Science Conference 2021}, pages={187--195}, year={2021} } @inproceedings{evans2007differential, title={Differential testing: a new approach to change detection}, author={Evans, Robert B and Savoia, Alberto}, booktitle={The 6th Joint Meeting on European software engineering conference and the ACM SIGSOFT Symposium on the Foundations of Software Engineering: Companion Papers}, pages={549--552}, year={2007} } @inproceedings{Si2022chat, author = {Si, Wai Man and Backes, Michael and Blackburn, Jeremy and De Cristofaro, Emiliano and Stringhini, Gianluca and Zannettou, Savvas and Zhang, Yang}, title = {Why So Toxic? Measuring and Triggering Toxic Behavior in Open-Domain Chatbots}, year = {2022}, isbn = {9781450394505}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3548606.3560599}, doi = {10.1145/3548606.3560599}, abstract = {Chatbots are used in many applications, e.g., automated agents, smart home assistants, interactive characters in online games, etc. Therefore, it is crucial to ensure they do not behave in undesired manners, providing offensive or toxic responses to users. This is not a trivial task as state-of-the-art chatbot models are trained on large, public datasets openly collected from the Internet. This paper presents a first-of-its-kind, large-scale measurement of toxicity in chatbots. We show that publicly available chatbots are prone to providing toxic responses when fed toxic queries. Even more worryingly, some non-toxic queries can trigger toxic responses too. We then set out to design and experiment with an attack, ToxicBuddy, which relies on fine-tuning GPT-2 to generate non-toxic queries that make chatbots respond in a toxic manner. Our extensive experimental evaluation demonstrates that our attack is effective against public chatbot models and outperforms manually-crafted malicious queries proposed by previous work. We also evaluate three defense mechanisms against ToxicBuddy, showing that they either reduce the attack performance at the cost of affecting the chatbot's utility or are only effective at mitigating a portion of the attack. This highlights the need for more research from the computer security and online safety communities to ensure that chatbot models do not hurt their users. Overall, we are confident that ToxicBuddy can be used as an auditing tool and that our work will pave the way toward designing more effective defenses for chatbot safety.}, booktitle = {Proceedings of the 2022 ACM SIGSAC Conference on Computer and Communications Security}, pages = {2659–2673}, numpages = {15}, keywords = {trustworthy machine learning, dialogue system, online toxicity}, location = {Los Angeles, CA, USA}, series = {CCS '22} } @article{ali2023instagram, author = {Ali, Shiza and Razi, Afsaneh and Kim, Seunghyun and Alsoubai, Ashwaq and Ling, Chen and De Choudhury, Munmun and Wisniewski, Pamela J. and Stringhini, Gianluca}, title = {Getting Meta: A Multimodal Approach for Detecting Unsafe Conversations within Instagram Direct Messages of Youth}, year = {2023}, issue_date = {April 2023}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {7}, number = {CSCW1}, url = {https://doi.org/10.1145/3579608}, doi = {10.1145/3579608}, abstract = {Instagram, one of the most popular social media platforms among youth, has recently come under scrutiny for potentially being harmful to the safety and well-being of our younger generations. Automated approaches for risk detection may be one way to help mitigate some of these risks if such algorithms are both accurate and contextual to the types of online harms youth face on social media platforms. However, the imminent switch by Instagram to end-to-end encryption for private conversations will limit the type of data that will be available to the platform to detect and mitigate such risks. In this paper, we investigate which indicators are most helpful in automatically detecting risk in Instagram private conversations, with an eye on high-level metadata, which will still be available in the scenario of end-to-end encryption. Toward this end, we collected Instagram data from 172 youth (ages 13-21) and asked them to identify private message conversations that made them feel uncomfortable or unsafe. Our participants risk-flagged 28,725 conversations that contained 4,181,970 direct messages, including textual posts and images. Based on this rich and multimodal dataset, we tested multiple feature sets (metadata, linguistic cues, and image features) and trained classifiers to detect risky conversations. Overall, we found that the metadata features (e.g., conversation length, a proxy for participant engagement) were the best predictors of risky conversations. However, for distinguishing between risk types, the different linguistic and media cues were the best predictors. Based on our findings, we provide design implications for AI risk detection systems in the presence of end-to-end encryption. More broadly, our work contributes to the literature on adolescent online safety by moving toward more robust solutions for risk detection that directly takes into account the lived risk experiences of youth.}, journal = {Proc. ACM Hum.-Comput. Interact.}, month = {apr}, articleno = {132}, numpages = {30}, keywords = {social media, machine learning, online risk detection, instagram, ensemble models, end-to-end encryption} } @article{Myatt2008badapple, author = {Myatt, David P. and Wallace, Chris}, title = "{When Does One Bad Apple Spoil the Barrel? An Evolutionary Analysis of Collective Action}", journal = {The Review of Economic Studies}, volume = {75}, number = {2}, pages = {499-527}, year = {2008}, month = {04}, abstract = "{This paper studies collective-action games in which the production of a public good requires teamwork. A leading example is a threshold game in which provision requires the voluntary participation of m out of n players. Quantal-response strategy revisions allow play to move between equilibria in which a team successfully provides, and an equilibrium in which the collective action fails. A full characterization of long-run play reveals the determinants of success; these include the correlation between players' costs of provision and their valuations for the good. The addition of an extra “bad apple” player can “spoil the barrel” by destabilizing successful teams and so offers a rationale for limiting the pool of possible contributors.}", issn = {0034-6527}, doi = {10.1111/j.1467-937X.2008.00482.x}, url = {https://doi.org/10.1111/j.1467-937X.2008.00482.x}, eprint = {https://academic.oup.com/restud/article-pdf/75/2/499/18350719/75-2-499.pdf}, } @article{watson1973deindividualization, title={Investigation into deindividuation using a cross-cultural survey technique.}, author={Watson, Robert I}, year={1973}, publisher={American Psychological Association} } @incollection{tajfel2004socialidentitytheory, title={The social identity theory of intergroup behavior}, author={Tajfel, Henri and Turner, John C}, booktitle={Political psychology}, pages={276--293}, year={2004}, publisher={Psychology Press} } @Article{rozado2023politicalcompass, AUTHOR = {Rozado, David}, TITLE = {The Political Biases of ChatGPT}, JOURNAL = {Social Sciences}, VOLUME = {12}, YEAR = {2023}, NUMBER = {3}, ARTICLE-NUMBER = {148}, URL = {https://www.mdpi.com/2076-0760/12/3/148}, ISSN = {2076-0760}, ABSTRACT = {Recent advancements in Large Language Models (LLMs) suggest imminent commercial applications of such AI systems where they will serve as gateways to interact with technology and the accumulated body of human knowledge. The possibility of political biases embedded in these models raises concerns about their potential misusage. In this work, we report the results of administering 15 different political orientation tests (14 in English, 1 in Spanish) to a state-of-the-art Large Language Model, the popular ChatGPT from OpenAI. The results are consistent across tests; 14 of the 15 instruments diagnose ChatGPT answers to their questions as manifesting a preference for left-leaning viewpoints. When asked explicitly about its political preferences, ChatGPT often claims to hold no political opinions and to just strive to provide factual and neutral information. It is desirable that public facing artificial intelligence systems provide accurate and factual information about empirically verifiable issues, but such systems should strive for political neutrality on largely normative questions for which there is no straightforward way to empirically validate a viewpoint. Thus, ethical AI systems should present users with balanced arguments on the issue at hand and avoid claiming neutrality while displaying clear signs of political bias in their content.}, DOI = {10.3390/socsci12030148} } @inproceedings{agarwal2022graphnli, title={GraphNLI: A Graph-based Natural Language Inference Model for Polarity Prediction in Online Debates}, author={Agarwal, Vibhor and Joglekar, Sagar and Young, Anthony P and Sastry, Nishanth}, booktitle={Proceedings of the ACM Web Conference 2022}, pages={2729--2737}, year={2022} } @article{agarwal2022graph, title={A Graph-Based Context-Aware Model to Understand Online Conversations}, author={Agarwal, Vibhor and Young, Anthony P and Joglekar, Sagar and Sastry, Nishanth}, journal={arXiv preprint arXiv:2211.09207}, year={2022} } @misc{zhu2023chatgpt, title={Can ChatGPT Reproduce Human-Generated Labels? A Study of Social Computing Tasks}, author={Yiming Zhu and Peixian Zhang and Ehsan-Ul Haq and Pan Hui and Gareth Tyson}, year={2023}, eprint={2304.10145}, archivePrefix={arXiv}, primaryClass={cs.AI} } @inproceedings{si_why_2022, address = {Los Angeles CA USA}, title = {Why {So} {Toxic}?: {Measuring} and {Triggering} {Toxic} {Behavior} in {Open}-{Domain} {Chatbots}}, isbn = {978-1-4503-9450-5}, shorttitle = {Why {So} {Toxic}?}, url = {https://dl.acm.org/doi/10.1145/3548606.3560599}, doi = {10.1145/3548606.3560599}, abstract = {Chatbots are used in many applications, e.g., automated agents, smart home assistants, interactive characters in online games, etc. Therefore, it is crucial to ensure they do not behave in undesired manners, providing offensive or toxic responses to users. This is not a trivial task as state-of-the-art chatbot models are trained on large, public datasets openly collected from the Internet. This paper presents a first-of-its-kind, large-scale measurement of toxicity in chatbots. We show that publicly available chatbots are prone to providing toxic responses when fed toxic queries. Even more worryingly, some non-toxic queries can trigger toxic responses too. We then set out to design and experiment with an attack, ToxicBuddy, which relies on fine-tuning GPT-2 to generate non-toxic queries that make chatbots respond in a toxic manner. Our extensive experimental evaluation demonstrates that our attack is effective against public chatbot models and outperforms manually-crafted malicious queries proposed by previous work. We also evaluate three defense mechanisms against ToxicBuddy, showing that they either reduce the attack performance at the cost of affecting the chatbot’s utility or are only effective at mitigating a portion of the attack. This highlights the need for more research from the computer security and online safety communities to ensure that chatbot models do not hurt their users. Overall, we are confident that ToxicBuddy can be used as an auditing tool and that our work will pave the way toward designing more effective defenses for chatbot safety.}, language = {en}, urldate = {2023-03-17}, booktitle = {Proceedings of the 2022 {ACM} {SIGSAC} {Conference} on {Computer} and {Communications} {Security}}, publisher = {ACM}, author = {Si, Wai Man and Backes, Michael and Blackburn, Jeremy and De Cristofaro, Emiliano and Stringhini, Gianluca and Zannettou, Savvas and Zhang, Yang}, month = nov, year = {2022}, pages = {2659--2673}, } @inproceedings{abid_persistent_2021, address = {Virtual Event USA}, title = {Persistent {Anti}-{Muslim} {Bias} in {Large} {Language} {Models}}, isbn = {978-1-4503-8473-5}, url = {https://dl.acm.org/doi/10.1145/3461702.3462624}, doi = {10.1145/3461702.3462624}, abstract = {It has been observed that large-scale language models capture undesirable societal biases, e.g. relating to race and gender; yet religious bias has been relatively unexplored. We demonstrate that GPT-3, a state-of-the-art contextual language model, captures persistent Muslim-violence bias. We probe GPT-3 in various ways, including prompt completion, analogical reasoning, and story generation, to understand this anti-Muslim bias, demonstrating that it appears consistently and creatively in different uses of the model and that it is severe even compared to biases about other religious groups. For instance, “Muslim" is analogized to “terrorist" in 23\% of test cases, while “Jewish" is mapped to its most common stereotype, “money," in 5\% of test cases. We quantify the positive distraction needed to overcome this bias with adversarial text prompts, and find that use of the most positive 6 adjectives reduces violent completions for “Muslims" from 66\% to 20\%, but which is still higher than for other religious groups.}, language = {en}, urldate = {2023-03-17}, booktitle = {Proceedings of the 2021 {AAAI}/{ACM} {Conference} on {AI}, {Ethics}, and {Society}}, publisher = {ACM}, author = {Abid, Abubakar and Farooqi, Maheen and Zou, James}, month = jul, year = {2021}, pages = {298--306}, } @misc{blodgett_language_2020, title = {Language ({Technology}) is {Power}: {A} {Critical} {Survey} of "{Bias}" in {NLP}}, shorttitle = {Language ({Technology}) is {Power}}, url = {http://arxiv.org/abs/2005.14050}, abstract = {We survey 146 papers analyzing “bias” in NLP systems, finding that their motivations are often vague, inconsistent, and lacking in normative reasoning, despite the fact that analyzing “bias” is an inherently normative process. We further find that these papers’ proposed quantitative techniques for measuring or mitigating “bias” are poorly matched to their motivations and do not engage with the relevant literature outside of NLP. Based on these findings, we describe the beginnings of a path forward by proposing three recommendations that should guide work analyzing “bias” in NLP systems. These recommendations rest on a greater recognition of the relationships between language and social hierarchies, encouraging researchers and practitioners to articulate their conceptualizations of “bias”—i.e., what kinds of system behaviors are harmful, in what ways, to whom, and why, as well as the normative reasoning underlying these statements—and to center work around the lived experiences of members of communities affected by NLP systems, while interrogating and reimagining the power relations between technologists and such communities.}, language = {en}, urldate = {2023-03-19}, publisher = {arXiv}, author = {Blodgett, Su Lin and Barocas, Solon and Daumé III, Hal and Wallach, Hanna}, month = may, year = {2020}, note = {arXiv:2005.14050 [cs]}, keywords = {Computer Science - Computers and Society, Computer Science - Computation and Language}, } @misc{barikeri_redditbias_2021, title = {{RedditBias}: {A} {Real}-{World} {Resource} for {Bias} {Evaluation} and {Debiasing} of {Conversational} {Language} {Models}}, shorttitle = {{RedditBias}}, url = {http://arxiv.org/abs/2106.03521}, abstract = {Text representation models are prone to exhibit a range of societal biases, reflecting the noncontrolled and biased nature of the underlying pretraining data, which consequently leads to severe ethical issues and even bias amplification. Recent work has predominantly focused on measuring and mitigating bias in pretrained language models. Surprisingly, the landscape of bias measurements and mitigation resources and methods for conversational language models is still very scarce: it is limited to only a few types of bias, artificially constructed resources, and completely ignores the impact that debiasing methods may have on the final performance in dialog tasks, e.g., conversational response generation. In this work, we present REDDITBIAS, the first conversational data set grounded in the actual human conversations from Reddit, allowing for bias measurement and mitigation across four important bias dimensions: gender, race, religion, and queerness. Further, we develop an evaluation framework which simultaneously 1) measures bias on the developed REDDITBIAS resource, and 2) evaluates model capability in dialog tasks after model debiasing. We use the evaluation framework to benchmark the widely used conversational DialoGPT model along with the adaptations of four debiasing methods. Our results indicate that DialoGPT is biased with respect to religious groups and that some debiasing techniques can remove this bias while preserving downstream task performance.}, language = {en}, urldate = {2023-05-24}, publisher = {arXiv}, author = {Barikeri, Soumya and Lauscher, Anne and Vulić, Ivan and Glavaš, Goran}, month = jun, year = {2021}, note = {arXiv:2106.03521 [cs]}, keywords = {Computer Science - Computation and Language}, } @article{lee_exploring_2019, title = {Exploring {Social} {Bias} in {Chatbots} using {Stereotype} {Knowledge}}, abstract = {Exploring social bias in chatbots is an important, yet relatively unexplored problem. In this paper, we propose an approach to understand social bias in chatbots by leveraging stereotype knowledge. It allows interesting comparison of bias between chatbots and humans, and provides intuitive analysis of existing chatbots by borrowing the finer-grain concepts of sexism and racism.}, language = {en}, author = {Lee, Nayeon and Madotto, Andrea and Fung, Pascale}, year = {2019}, } @inproceedings{feine2020gender, title={Gender bias in chatbot design}, author={Feine, Jasper and Gnewuch, Ulrich and Morana, Stefan and Maedche, Alexander}, booktitle={Chatbot Research and Design: Third International Workshop, CONVERSATIONS 2019, Amsterdam, The Netherlands, November 19--20, 2019, Revised Selected Papers 3}, pages={79--93}, year={2020}, organization={Springer} } @article{silva2019algorithms, title={Algorithms, platforms, and ethnic bias}, author={Silva, Selena and Kenney, Martin}, journal={Communications of the ACM}, volume={62}, number={11}, pages={37--39}, year={2019}, publisher={ACM New York, NY, USA} } @book{lawrence2023hidden, title={Hidden in White Sight: How AI Empowers and Deepens Systemic Racism}, author={Lawrence, Calvin D}, year={2023}, publisher={CRC Press} } @article{solon_barocas_problem_2017, title = {The {Problem} {With} {Bias}: {Allocative} {Versus} {Representational} {Harms} in {Machine} {Learning}}, journal = {Proceedings of SIGCIS, Philadelphia, PA}, author = {{Solon Barocas} and {Kate Crawford} and {Aaron Shapiro} and {Hanna Wallach}}, year = {2017}, } @article{frick2022transformer, title={Fraunhofer SIT at CheckThat! 2022: ensemble similarity estimation for finding previously fact-checked claims}, author={Frick, Raphael Antonius and Vogel, Inna}, journal={Working Notes of CLEF}, year={2022} } @inproceedings{siddique2022transformer, title={Personalizing Task-oriented Dialog Systems via Zero-shot Generalizable Reward Function}, author={Siddique, AB and Maqbool, MH and Taywade, Kshitija and Foroosh, Hassan}, booktitle={Proceedings of the 31st ACM International Conference on Information \& Knowledge Management}, pages={1787--1797}, year={2022} } @misc{shen2023chatgpt, title={In ChatGPT We Trust? Measuring and Characterizing the Reliability of ChatGPT}, author={Xinyue Shen and Zeyuan Chen and Michael Backes and Yang Zhang}, year={2023}, eprint={2304.08979}, archivePrefix={arXiv}, primaryClass={cs.CR} } @article{ali2023chatgpt, title={ChatGPT and Lacrimal Drainage Disorders: Performance and Scope of Improvement}, author={Ali, M. J.}, journal={Ophthalmic plastic and reconstructive surgery}, volume={39}, number={3}, pages={221--225}, year={2023}, doi={10.1097/IOP.0000000000002418}, } @misc{TheStar2023WokeAI, author = "Kevin Jiang", title = "What is 'woke AI' and why is Elon Musk reportedly building a chatbot to counter it?", howpublished = "TheStar", year = "2023", month = "March", day = "2", url = "https://www.thestar.com/business/2023/03/01/what-is-woke-ai-and-why-is-elon-musk-reportedly-building-a-chatbot-to-counter-it.html", note = "Accessed on Month Day, Year", } @misc{Verge2023WokeAI, author = "Vincent, James", title = "As conservatives criticize ‘woke AI,’ here are ChatGPT’s rules for answering culture war queries", howpublished = "The Verge", year = "2023", month = "February", day = "17", url = "https://www.theverge.com/2023/2/17/23603906/openai-chatgpt-woke-criticism-culture-war-rules", note = "Accessed on Month Day, Year", } @article{beck_managing_2019, title = {Managing conflict in online debate communities}, volume = {24}, shorttitle = {Managing {Conflict} in {Online} {Debate} {Communities}}, abstract = {Kialo is a novel peer production system focused on pro/con debate construction. Teams of moderators vet and accept claims submitted by writers. Moderators also edit and refactor debates as they grow. Thus, moderators play a critical role in cultivating and maintaining debates. Conflict between moderators is typical. It is a feature of argumentation and debate. However, not all conflict is productive. Conflict between moderators can undermine collaboration (by distracting from the task of managing debates) and drive attrition (by discouraging participation on the site altogether). Based on a ten-month participant observation on Kialo, we identify a common source of conflict between moderators: adversarial beliefs and values. Moderators are not neutral participants on Kialo. They take positions on debate topics. We suggest foregrounding these positions, which are potential sources of conflict, through interface design as a scalable solution to conflict management.}, language = {en}, number = {7}, urldate = {2023-06-02}, journal = {First Monday}, author = {Beck, Jordan and Neupane, Bikalpa and Carroll, John M.}, month = jun, year = {2019}, doi = {https://doi.org/10.5210/fm.v24i7.9585}, } @misc{pew-research, author = "{Pew Research Center—U.S. Politics \& Policy (blog)}", title = "Political Typology Quiz", howpublished = "Available online", year = "n.d.", url = "https://www.pewresearch.org/politics/quiz/political-typology/", } @misc{eightvaluestest, author = "{IDRlabs}", title = "8 Values Political Test", howpublished = "Available online", year = "n.d.", url = "https://www.idrlabs.com/8-values-political/test.php", } @misc{political-compass, author = "{The Political Compass}", title = "Political Compass Test", howpublished = "Available online", year = "n.d.", url = "https://www.politicalcompass.org/test", } @inproceedings{Zhou2020allsides, author = {Zhou, Xinyi and Mulay, Apurva and Ferrara, Emilio and Zafarani, Reza}, title = {ReCOVery: A Multimodal Repository for COVID-19 News Credibility Research}, year = {2020}, isbn = {9781450368599}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3340531.3412880}, doi = {10.1145/3340531.3412880}, abstract = {First identified in Wuhan, China, in December 2019, the outbreak of COVID-19 has been declared as a global emergency in January, and a pandemic in March 2020 by the World Health Organization (WHO). Along with this pandemic, we are also experiencing an "infodemic" of information with low credibility such as fake news and conspiracies. In this work, we present ReCOVery, a repository designed and constructed to facilitate research on combating such information regarding COVID-19. We first broadly search and investigate ~2,000 news publishers, from which 60 are identified with extreme [high or low] levels of credibility. By inheriting the credibility of the media on which they were published, a total of 2,029 news articles on coronavirus, published from January to May 2020, are collected in the repository, along with 140,820 tweets that reveal how these news articles have spread on the Twitter social network. The repository provides multimodal information of news articles on coronavirus, including textual, visual, temporal, and network information. The way that news credibility is obtained allows a trade-off between dataset scalability and label accuracy. Extensive experiments are conducted to present data statistics and distributions, as well as to provide baseline performances for predicting news credibility so that future methods can be compared. Our repository is available at http://coronavirus-fakenews.com.}, booktitle = {Proceedings of the 29th ACM International Conference on Information \& Knowledge Management}, pages = {3205–3212}, numpages = {8}, keywords = {fake news, infodemic, repository, social media, information credibility, coronavirus, multimodal, covid-19, pandemic}, location = {Virtual Event, Ireland}, series = {CIKM '20} } @inproceedings{Deb2019allsides, author = {Deb, Ashok and Luceri, Luca and Badaway, Adam and Ferrara, Emilio}, title = {Perils and Challenges of Social Media and Election Manipulation Analysis: The 2018 US Midterms}, year = {2019}, isbn = {9781450366755}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3308560.3316486}, doi = {10.1145/3308560.3316486}, abstract = {One of the hallmarks of a free and fair society is the ability to conduct a peaceful and seamless transfer of power from one leader to another. Democratically, this is measured in a citizen population’s trust in the electoral system of choosing a representative government. In view of the well documented issues of the 2016 US Presidential election, we conducted an in-depth analysis of the 2018 US Midterm elections looking specifically for voter fraud or suppression. The Midterm election occurs in the middle of a 4 year presidential term. For the 2018 midterms, 35 Senators and all the 435 seats in the House of Representatives were up for re-election, thus, every congressional district and practically every state had a federal election. In order to collect election related tweets, we analyzed Twitter during the month prior to, and the two weeks following, the November 6, 2018 election day. In a targeted analysis to detect statistical anomalies or election interference, we identified several biases that can lead to wrong conclusions. Specifically, we looked for divergence between actual voting outcomes and instances of the #ivoted hashtag on the election day. This analysis highlighted three states of concern: New York, California, and Texas. We repeated our analysis discarding malicious accounts, such as social bots. Upon further inspection and against a backdrop of collected general election-related tweets, we identified some confounding factors, such as population bias, or bot and political ideology inference, that can lead to false conclusions. We conclude by providing an in-depth discussion of the perils and challenges of using social media data to explore questions about election manipulation.}, booktitle = {Companion Proceedings of The 2019 World Wide Web Conference}, pages = {237–247}, numpages = {11}, keywords = {political elections, data science for society, social media}, location = {San Francisco, USA}, series = {WWW '19} } @inproceedings{Ye2019allsides, author = {Ye, Junting and Skiena, Steven}, title = {MediaRank: Computational Ranking of Online News Sources}, year = {2019}, isbn = {9781450362016}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3292500.3330709}, doi = {10.1145/3292500.3330709}, abstract = {In the recent political climate, the topic of news quality has drawn attention both from the public and the academic communities. The growing distrust of traditional news media makes it harder to find a common base of accepted truth. In this work, we design and build MediaRank (urlwww.media-rank.com ), a fully automated system to rank over 50,000 online news sources around the world. MediaRank collects and analyzes one million news webpages and two million related tweets everyday. We base our algorithmic analysis on four properties journalists have established to be associated with reporting quality: peer reputation, reporting bias/breadth, bottomline financial pressure, and popularity. Our major contributions of this paper include: (i) Open, interpretable quality rankings for over 50,000 of the world's major news sources. Our rankings are validated against 35 published news rankings, including French, German, Russian, and Spanish language sources. MediaRank scores correlate positively with 34 of 35 of these expert rankings. (ii) New computational methods for measuring influence and bottomline pressure. To the best of our knowledge, we are the first to study the large-scale news reporting citation graph in-depth. We also propose new ways to measure the aggressiveness of advertisements and identify social bots, establishing a connection between both types of bad behavior. (iii) Analyzing the effect of media source bias and significance. We prove that news sources cite others despite different political views in accord with quality measures. However, in four English-speaking countries (US, UK, Canada, and Australia), the highest ranking sources all disproportionately favor left-wing parties, even when the majority of news sources exhibited conservative slants.}, booktitle = {Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining}, pages = {2469–2477}, numpages = {9}, keywords = {online news ranking, mediarank, news citation network/graph}, location = {Anchorage, AK, USA}, series = {KDD '19} } @inproceedings{Fourney2018gunningfog, author = {Fourney, Adam and Ringel Morris, Meredith and Ali, Abdullah and Vonessen, Laura}, title = {Assessing the Readability of Web Search Results for Searchers with Dyslexia}, year = {2018}, isbn = {9781450356572}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3209978.3210072}, doi = {10.1145/3209978.3210072}, abstract = {Standards organizations, (e.g., the World Wide Web Consortium), are placing increased importance on the cognitive accessibility of online systems, including web search. Previous work has shown an association between query-document relevance judgments, and query-independent assessments of document readability. In this paper, we study the lexical and aesthetic features of web documents that may underlie this relationship. Leveraging a data set consisting of relevance and readability judgments for 200 web pages as assessed by 174 adults with dyslexia and 172 adults without dyslexia, we answer the following research questions: (1) Which web page features are most associated with readability? (2) To what extent are these features also associated with relevance? And, (3) are any features associated with the differences in readability/relevance judgments observed between dyslexic and non-dyslexic populations? Our findings have implications for improving the cognitive accessibility of search systems and web documents.}, booktitle = {The 41st International ACM SIGIR Conference on Research \& Development in Information Retrieval}, pages = {1069–1072}, numpages = {4}, keywords = {dyslexia, web search, readability}, location = {Ann Arbor, MI, USA}, series = {SIGIR '18} } @article{Bothun2022gunningfog, title = {Readability of COVID-19 vaccine information for the general public}, journal = {Vaccine}, volume = {40}, number = {25}, pages = {3466-3469}, year = {2022}, issn = {0264-410X}, doi = {https://doi.org/10.1016/j.vaccine.2022.04.096}, url = {https://www.sciencedirect.com/science/article/pii/S0264410X22005461}, author = {Luke S. Bothun and Scott E. Feeder and Gregory A. Poland}, keywords = {Research, Reading, Research Subjects, Consent Forms, Clinical Trial, Coronavirus Infections, COVID-19, Vaccine, Comprehension}, abstract = {Background More than 130 million individuals in the United States have now received at least one dose of a COVID-19 vaccine. Currently, all adults in the Unites States now have access to one of three COVID-19 vaccines. As part of the vaccination procedure, Emergency Use Authorization (EUA) fact sheets, which contain information regarding the vaccine, are provided. The purpose of this study was to analyze the ease of reading (i.e., readability) of the EUA-approved fact sheets for the vaccines currently available in the United States, the V-Safe adverse event survey script, and the Centers for Disease Control and Prevention (CDC) website information on COVID-19 vaccines designed for the general public in the United States. Methods We acquired the Pfizer, Moderna, and Janssen EUA fact sheets, as well as the V-Safe survey script and the CDC website information regarding COVID-19 vaccines. These documents were analyzed for their complexity regarding the following readability factors: average length of paragraphs, sentences, and words; font size and style; use of passive voice; the Gunning-Fog index; the Flesch Reading Ease index; and the Flesch-Kincaid Grade Level index. Results Only the V-Safe adverse-event survey script met readability standards for adequate comprehension. The mean readability scores of the EUA fact sheets and the CDC website were as follows: Flesch Reading Ease score (44.35 avg); Flesch-Kincaid Grade Level (10.48 avg); and Gunning-Fog index (11.8 avg).These scores indicate that at least a 10th-grade level education would be required to understand these reading materials. Conclusion The average person in the United States would have difficulty understanding the information provided in the EUA fact sheets and CDC COVID-19 vaccine website documents; however, the V-Safe survey was written at an adequate reading level. To ensure that the general public fully understands information regarding COVID-19 vaccines, greater care and effort should be given to the development of simplified information material.} } @article{Suleiman2016gunningfog, author = {Ahna Ballonoff Suleiman and Jessica S. Lin and Norman A. Constantine}, title = {Readability of Educational Materials to Support Parent Sexual Communication With Their Children and Adolescents}, journal = {Journal of Health Communication}, volume = {21}, number = {5}, pages = {534-543}, year = {2016}, publisher = {Taylor \& Francis}, doi = {10.1080/10810730.2015.1103334}, note ={PMID: 27116292}, URL = { https://doi.org/10.1080/10810730.2015.1103334 }, eprint = { https://doi.org/10.1080/10810730.2015.1103334 } } @inproceedings{ghafouri2023kialo, author = {Ghafouri, Vahid and Agarwal, Vibhor and Zhang, Yong and Sastry, Nishanth and Such, Jose and Suarez-Tangil, Guillermo}, title = {AI in the Gray: Exploring Moderation Policies in Dialogic Large Language Models vs. Human Answers in Controversial Topics}, year = {2023}, isbn = {9798400701245}, url = {https://doi.org/10.1145/3583780.3614777}, doi = {10.1145/3583780.3614777}, abstract = {The introduction of ChatGPT and the subsequent improvement of Large Language Models (LLMs) have prompted more and more individuals to turn to the use of ChatBots, both for information and assistance with decision-making. However, the information the user is after is often not formulated by these ChatBots objectively enough to be provided with a definite, globally accepted answer. Controversial topics, such as "religion", "gender identity", "freedom of speech", and "equality", among others, can be a source of conflict as partisan or biased answers can reinforce preconceived notions or promote disinformation. By exposing ChatGPT to such debatable questions, we aim to understand its level of awareness and if existing models are subject to socio-political and/or economic biases. We also aim to explore how AI-generated answers compare to human ones. For exploring this, we use a dataset of a social media platform created for the purpose of debating human-generated claims on polemic subjects among users, dubbed Kialo. Our results show that while previous versions of ChatGPT have had important issues with controversial topics, more recent versions of ChatGPT (gpt-3.5-turbo) are no longer manifesting significant explicit biases in several knowledge areas. In particular, it is well-moderated regarding economic aspects. However, it still maintains degrees of implicit libertarian leaning toward right-winged ideals which suggest the need for increased moderation from the socio-political point of view. In terms of domain knowledge on controversial topics, with the exception of the "Philosophical" category, ChatGPT is performing well in keeping up with the collective human level of knowledge. Finally, we see that sources of Bing AI have slightly more tendency to the center when compared to human answers. All the analyses we make are generalizable to other types of biases and domains.}, booktitle = {Proceedings of the 32nd ACM International Conference on Information and Knowledge Management}, pages = {556–565}, numpages = {10}, keywords = {sentence transformers, controversial topics, NLP, Kialo, ChatGPT, AI bias}, location = {, Birmingham, United Kingdom, }, series = {CIKM '23} } @article{ghafouri2024echo, author = {Ghafouri, Vahid and Alatawi, Faisal and Karami, Mansooreh and Such, Jose and Suarez-Tangil, Guillermo}, title = {Transformer-Based Quantification of the Echo Chamber Effect in Online Communities}, year = {2024}, issue_date = {November 2024}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {8}, number = {CSCW2}, url = {https://doi.org/10.1145/3687006}, doi = {10.1145/3687006}, abstract = {An Echo Chamber on social media refers to the environment where like-minded people hear the echo of each others' voices, opinions, or beliefs, which reinforce their own. Echo Chambers can turn social media platforms into collaborative venues that polarize and radicalize users rather than broadening their exposure to diverse information. Having a quantified metric for measuring the Echo Chamber effect can aid moderators and policymakers in tracking and mitigating online polarization and radicalization. Existing methods for Echo Chamber detection are either one-dimensional, only considering the network behavior of users while ignoring their semantic behavior, or require demanding supervised labeling, which is both expensive and less generalizable.This paper proposes a new metric to quantify the Echo Chamber effect using Transformer models for context-sensitive processing of natural language (NLP). Our metric quantifies (1) the effect of an Echo Chamber through the inverse effect of user diversity, and (2) polarization by means of user separability between two Echo Chambers in a topic. Leveraging this metric, we further propose an NLP-based embedding that represents the users' activity. Our model is simultaneously effective, computationally cheap, and unsupervised. As our method is unsupervised, it makes existing collaborative moderation efforts to thwart Echo Chamber effects more efficient by addressing the problem of identifying narrow information bases for algorithmic biases and misinformation detection. We run our analysis on three recent highly controversial political topics and a non-controversial topic: Russo-Ukrainian War, Abortion, Gun-Control, and SXSW music festival. Our results offer data-driven findings such as a higher Echo Chamber effect among Republicans over Democrats and diverse explicit support for Ukraine, especially among Democrats. We also observe a direct relationship between the Echo Chamber effect and polarization while observing that the low Echo Chamber effect for the Russo-Ukraine war is accompanied by a low polarization; and vice versa for Gun-Control.}, journal = {Proc. ACM Hum.-Comput. Interact.}, month = nov, articleno = {467}, numpages = {27}, keywords = {NLP, echo chambers, polarization, sentence transformers, social networks} } @article{Lingam2018semeval2014, author = "Vijay Lingam and Simran Bhuria and Mayukh Nair and Divij Gurpreetsingh and Anjali Goyal and Ashish Sureka", title = "{Dataset for Conflicting Statements Detection in Text}", year = "2018", month = "2", url = "https://figshare.com/articles/dataset/Dataset_for_Conflicting_Statements_Detection_in_Text/5873823", doi = "10.6084/m9.figshare.5873823.v1" } @misc{hu2021lora, title={LoRA: Low-Rank Adaptation of Large Language Models}, author={Edward J. Hu and Yelong Shen and Phillip Wallis and Zeyuan Allen-Zhu and Yuanzhi Li and Shean Wang and Lu Wang and Weizhu Chen}, year={2021}, primaryClass={cs.CL} } @article{wang2023r32, title={MultiLoRA: Democratizing LoRA for Better Multi-Task Learning}, author={Wang, Yiming and Lin, Yu and Zeng, Xiaodong and Zhang, Guannan}, year={2023} } @article{liu2023r32, title={Sparsely Shared LoRA on Whisper for Child Speech Recognition}, author={Liu, Wei and Qin, Ying and Peng, Zhiyuan and Lee, Tan}, year={2023} } @inproceedings{marelli2014semeval, title = "{S}em{E}val-2014 Task 1: Evaluation of Compositional Distributional Semantic Models on Full Sentences through Semantic Relatedness and Textual Entailment", author = "Marelli, Marco and Bentivogli, Luisa and Baroni, Marco and Bernardi, Raffaella and Menini, Stefano and Zamparelli, Roberto", editor = "Nakov, Preslav and Zesch, Torsten", booktitle = "Proceedings of the 8th International Workshop on Semantic Evaluation ({S}em{E}val 2014)", month = aug, year = "2014", address = "Dublin, Ireland", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/S14-2001", doi = "10.3115/v1/S14-2001", pages = "1--8", } @inproceedings{Fang2020DistributionShift, author = {Fang, Tongtong and Lu, Nan and Niu, Gang and Sugiyama, Masashi}, booktitle = {Advances in Neural Information Processing Systems}, editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin}, pages = {11996--12007}, publisher = {Curran Associates, Inc.}, title = {Rethinking Importance Weighting for Deep Learning under Distribution Shift}, url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/8b9e7ab295e87570551db122a04c6f7c-Paper.pdf}, volume = {33}, year = {2020} } @InProceedings{zhai2024CatastrophicForgetting, title = {Investigating the Catastrophic Forgetting in Multimodal Large Language Model Fine-Tuning}, author = {Zhai, Yuexiang and Tong, Shengbang and Li, Xiao and Cai, Mu and Qu, Qing and Lee, Yong Jae and Ma, Yi}, booktitle = {Conference on Parsimony and Learning}, pages = {202--227}, year = {2024}, editor = {Chi, Yuejie and Dziugaite, Gintare Karolina and Qu, Qing and Wang, Atlas Wang and Zhu, Zhihui}, volume = {234}, series = {Proceedings of Machine Learning Research}, month = {03--06 Jan}, publisher = {PMLR}, pdf = {https://proceedings.mlr.press/v234/zhai24a/zhai24a.pdf}, url = {https://proceedings.mlr.press/v234/zhai24a.html}, abstract = {Following the success of GPT4, there has been a surge in interest in multimodal large language model (MLLM) research. This line of research focuses on developing general-purpose LLMs through fine-tuning pre-trained LLMs and vision models. However, catastrophic forgetting, a notorious phenomenon where the fine-tuned model fails to retain similar performance compared to the pre-trained model, still remains an inherited problem in multimodal LLMs (MLLM). In this paper, we introduce EMT: Evaluating MulTimodality for evaluating the catastrophic forgetting in MLLMs, by treating each MLLM as an image classifier. We first apply EMT to evaluate several open-source fine-tuned MLLMs and we discover that almost all evaluated MLLMs fail to retain the same performance levels as their vision encoders on standard image classification tasks. Moreover, we continue fine-tuning LLaVA, an MLLM and utilize EMT to assess performance throughout the fine-tuning. Interestingly, our results suggest that early-stage fine-tuning on an image dataset improves performance across other image datasets, by enhancing the alignment of text and language features. However, as fine-tuning proceeds, the MLLMs begin to hallucinate, resulting in a significant loss of generalizability, even when the image encoder remains frozen. Our results suggest that MLLMs have yet to demonstrate performance on par with their vision models on standard image classification tasks and the current MLLM fine-tuning procedure still has room for improvement.} } @article{Introne2023sbertproblem, title={Measuring Belief Dynamics on Twitter}, volume={17}, url={https://ojs.aaai.org/index.php/ICWSM/article/view/22154}, DOI={10.1609/icwsm.v17i1.22154}, abstractNote={There is growing concern about misinformation and the role online media plays in social polarization. Analyzing belief dynamics is one way to enhance our understanding of these problems. Existing analytical tools, such as sur-vey research or stance detection, lack the power to corre-late contextual factors with population-level changes in belief dynamics. In this exploratory study, I present the Belief Landscape Framework, which uses data about people’s professed beliefs in an online setting to measure belief dynamics with more temporal granularity than previous methods. I apply the approach to conversations about climate change on Twitter and provide initial validation by comparing the method’s output to a set of hypotheses drawn from the literature on dynamic systems. My analysis indicates that the method is relatively robust to different parameter settings, and results suggest that 1) there are many stable configurations of belief on the polarizing issue of climate change and 2) that people move in predictable ways around these points. The method paves the way for more powerful tools that can be used to understand how the modern digital media eco-system impacts collective belief dynamics and what role misinformation plays in that process.}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={Introne, Joshua}, year={2023}, month={Jun.}, pages={387-398} } @article{Iqbal2023Nextdoor, title={Lady and the Tramp Nextdoor: Online Manifestations of Real-World Inequalities in the Nextdoor Social Network}, volume={17}, url={https://ojs.aaai.org/index.php/ICWSM/article/view/22155}, DOI={10.1609/icwsm.v17i1.22155}, abstractNote={From health to education, income impacts a huge range of life choices. Earlier research has leveraged data from online social networks to study precisely this impact. In this paper, we ask the opposite question: do different levels of income result in different online behaviors? We demonstrate it does. We present the first large-scale study of Nextdoor, a popular location-based social network. We collect 2.6 Million posts from 64,283 neighborhoods in the United States and 3,325 neighborhoods in the United Kingdom, to examine whether online discourse reflects the income and income inequality of a neighborhood. We show that posts from neighborhoods with different incomes indeed differ, e.g. richer neighborhoods have a more positive sentiment and discuss crimes more, even though their actual crime rates are much lower. We then show that user-generated content can predict both income and inequality. We train multiple machine learning models and predict both income (R2=0.841) and inequality (R2=0.77).}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={Iqbal, Waleed and Ghafouri, Vahid and Tyson, Gareth and Suarez-Tangil, Guillermo and Castro, Ignacio}, year={2023}, month={Jun.}, pages={399-410} } @InProceedings{Hazra2023pkddsbert, author="Hazra, Rima and Dwivedi, Arpit and Mukherjee, Animesh", editor="Amini, Massih-Reza and Canu, St{\'e}phane and Fischer, Asja and Guns, Tias and Kralj Novak, Petra and Tsoumakas, Grigorios", title="Is This Bug Severe? A Text-Cum-Graph Based Model for Bug Severity Prediction", booktitle="Machine Learning and Knowledge Discovery in Databases", year="2023", publisher="Springer Nature Switzerland", address="Cham", pages="236--252", abstract="Repositories of large software systems have become commonplace. This massive expansion has resulted in the emergence of various problems in these software platforms including identification of (i) bug-prone packages, (ii) critical bugs, and (iii) severity of bugs. One of the important goals would be to mine these bugs and recommend them to the developers to resolve them. The first step to this is that one has to accurately detect the extent of severity of the bugs. In this paper, we take up this task of predicting the severity of bugs in the near future. Contextualized neural models built on the text description of a bug and the user comments about the bug help to achieve reasonably good performance. Further information on how the bugs are related to each other in terms of the ways they affect packages can be summarised in the form of a graph and used along with the text to get additional benefits.", isbn="978-3-031-26422-1" } @InProceedings{Upadhyay2023bertrerankers, author="Upadhyay, Rishabh and Pasi, Gabriella and Viviani, Marco", editor="Koutra, Danai and Plant, Claudia and Gomez Rodriguez, Manuel and Baralis, Elena and Bonchi, Francesco", title="A Passage Retrieval Transformer-Based Re-Ranking Model for Truthful Consumer Health Search", booktitle="Machine Learning and Knowledge Discovery in Databases: Research Track", year="2023", publisher="Springer Nature Switzerland", address="Cham", pages="355--371", abstract="Searching for online information is nowadays a critical task in a scenario characterized by information overload and misinformation. To address these issues, it is necessary to provide users with both topically relevant and truthful information. Re-ranking is a strategy often used in Information Retrieval (IR) to consider multiple dimensions of relevance. However, re-rankers often analyze the full text of documents to obtain an overall relevance score at the re-ranking stage, which can lead to sub-optimal results. Some recent Transformer-based re-rankers actually consider text passages rather than the entire document, but focus only on topical relevance. Transformers are also being used in non-IR solutions to identify information truthfulness, but just to perform a binary classification task. Therefore, in this article, we propose an IR model based on re-ranking that focuses on suitably identified text passages from documents for retrieving both topically relevant and truthful information. This approach significantly reduces the noise introduced by query-unrelated content in long documents and allows us to evaluate the document's truthfulness against it, enabling more effective retrieval. We tested the effectiveness of the proposed solution in the context of the Consumer Health Search task, considering publicly available datasets. Our results show that the proposed approach statistically outperforms full-text retrieval models in the context of multidimensional relevance, such as those based on aggregation, and monodimensional relevance Transformer-based re-rankers, such as BERT-based re-rankers.", isbn="978-3-031-43412-9" } @article{kucuk2020stancedetection, author = {K\"{u}\c{c}\"{u}k, Dilek and Can, Fazli}, title = {Stance Detection: A Survey}, year = {2020}, issue_date = {January 2021}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {53}, number = {1}, issn = {0360-0300}, url = {https://doi.org/10.1145/3369026}, doi = {10.1145/3369026}, abstract = {Automatic elicitation of semantic information from natural language texts is an important research problem with many practical application areas. Especially after the recent proliferation of online content through channels such as social media sites, news portals, and forums; solutions to problems such as sentiment analysis, sarcasm/controversy/veracity/rumour/fake news detection, and argument mining gained increasing impact and significance, revealed with large volumes of related scientific publications. In this article, we tackle an important problem from the same family and present a survey of stance detection in social media posts and (online) regular texts. Although stance detection is defined in different ways in different application settings, the most common definition is “automatic classification of the stance of the producer of a piece of text, towards a target, into one of these three classes: {Favor, Against, Neither}.” Our survey includes definitions of related problems and concepts, classifications of the proposed approaches so far, descriptions of the relevant datasets and tools, and related outstanding issues. Stance detection is a recent natural language processing topic with diverse application areas, and our survey article on this newly emerging topic will act as a significant resource for interested researchers and practitioners.}, journal = {ACM Comput. Surv.}, month = {feb}, articleno = {12}, numpages = {37}, keywords = {social media analysis, deep learning, Twitter, Stance detection} } @inproceedings{sun2018stancedetection, title = "Stance Detection with Hierarchical Attention Network", author = "Sun, Qingying and Wang, Zhongqing and Zhu, Qiaoming and Zhou, Guodong", editor = "Bender, Emily M. and Derczynski, Leon and Isabelle, Pierre", booktitle = "Proceedings of the 27th International Conference on Computational Linguistics", month = aug, year = "2018", address = "Santa Fe, New Mexico, USA", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/C18-1203", pages = "2399--2409", abstract = "Stance detection aims to assign a stance label (for or against) to a post toward a specific target. Recently, there is a growing interest in using neural models to detect stance of documents. Most of these works model the sequence of words to learn document representation. However, much linguistic information, such as polarity and arguments of the document, is correlated with the stance of the document, and can inspire us to explore the stance. Hence, we present a neural model to fully employ various linguistic information to construct the document representation. In addition, since the influences of different linguistic information are different, we propose a hierarchical attention network to weigh the importance of various linguistic information, and learn the mutual attention between the document and the linguistic information. The experimental results on two datasets demonstrate the effectiveness of the proposed hierarchical attention neural model.", } @article{ALDayel2021stancedetection, title = {Stance detection on social media: State of the art and trends}, journal = {Information Processing \& Management}, volume = {58}, number = {4}, pages = {102597}, year = {2021}, issn = {0306-4573}, doi = {https://doi.org/10.1016/j.ipm.2021.102597}, url = {https://www.sciencedirect.com/science/article/pii/S0306457321000960}, author = {Abeer ALDayel and Walid Magdy}, keywords = {Stance detection, Stance, Social media, Stance classification}, abstract = {Stance detection on social media is an emerging opinion mining paradigm for various social and political applications in which sentiment analysis may be sub-optimal. There has been a growing research interest for developing effective methods for stance detection methods varying among multiple communities including natural language processing, web science, and social computing, where each modeled stance detection in different ways. In this paper, we survey the work on stance detection across those communities and present an exhaustive review of stance detection techniques on social media, including the task definition, different types of targets in stance detection, features set used, and various machine learning approaches applied. Our survey reports state-of-the-art results on the existing benchmark datasets on stance detection, and discusses the most effective approaches. In addition, we explore the emerging trends and different applications of stance detection on social media, including opinion mining and prediction and recently using it for fake news detection. The study concludes by discussing the gaps in the current existing research and highlights the possible future directions for stance detection on social media.} } @InProceedings{Dey2018stancedetection, author="Dey, Kuntal and Shrivastava, Ritvik and Kaushik, Saroj", editor="Pasi, Gabriella and Piwowarski, Benjamin and Azzopardi, Leif and Hanbury, Allan", title="Topical Stance Detection for Twitter: A Two-Phase LSTM Model Using Attention", booktitle="Advances in Information Retrieval", year="2018", publisher="Springer International Publishing", address="Cham", pages="529--536", abstract="The topical stance detection problem addresses detecting the stance of the text content with respect to a given topic: whether the sentiment of the given text content is in favor of (positive), is against (negative), or is none (neutral) towards the given topic. Using the concept of attention, we develop a two-phase solution. In the first phase, we classify subjectivity - whether a given tweet is neutral or subjective with respect to the given topic. In the second phase, we classify sentiment of the subjective tweets (ignoring the neutral tweets) - whether a given subjective tweet has a favor or against stance towards the topic. We propose a Long Short-Term memory (LSTM) based deep neural network for each phase, and embed attention at each of the phases. On the SemEval 2016 stance detection Twitter task dataset [7], we obtain a best-case macro F-score of 68.84{\%} and a best-case accuracy of 60.2{\%}, outperforming the existing deep learning based solutions. Our framework, T-PAN, is the first in the topical stance detection literature, that uses deep learning within a two-phase architecture.", isbn="978-3-319-76941-7" } @InProceedings{Wendenius2023triplet, author="Wendenius, Christof and Kuehn, Eileen and Streit, Achim", editor="Amini, Massih-Reza and Canu, St{\'e}phane and Fischer, Asja and Guns, Tias and Kralj Novak, Petra and Tsoumakas, Grigorios", title="Training Parameterized Quantum Circuits with Triplet Loss", booktitle="Machine Learning and Knowledge Discovery in Databases", year="2023", publisher="Springer Nature Switzerland", address="Cham", pages="515--530", abstract="Training parameterized quantum circuits (PQCs) is a growing research area that has received a boost from the emergence of new hybrid quantum classical algorithms and Quantum Machine Learning (QML) to leverage the power of today's quantum computers. However, a universal pipeline that guarantees good learning behavior has not yet been found, due to several challenges. These include in particular the low number of qubits and their susceptibility to noise but also the vanishing of gradients during training. In this work, we apply and evaluate Triplet Loss in a QML training pipeline utilizing a PQC for the first time. We perform extensive experiments for the Triplet Loss based setup and training on two common datasets, the MNIST and moon dataset. Without significant fine-tuning of training parameters and circuit layout, our proposed approach achieves competitive results to a regular training. Additionally, the variance and the absolute values of gradients are significantly better compared to training a PQC without Triplet Loss. The usage of metric learning proves to be suitable for QML and its high dimensional space as it is not as restrictive as learning on hard labels. Our results indicate that metric learning provides benefits to mitigate the so-called barren plateaus.", isbn="978-3-031-26419-1" } @misc{reimers2019sentencebert, title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}, author={Nils Reimers and Iryna Gurevych}, year={2019}, eprint={1908.10084}, archivePrefix={arXiv}, primaryClass={cs.CL} } @Article{Galli2024sberts, AUTHOR = {Galli, Carlo and Donos, Nikolaos and Calciolari, Elena}, TITLE = {Performance of 4 Pre-Trained Sentence Transformer Models in the Semantic Query of a Systematic Review Dataset on Peri-Implantitis}, JOURNAL = {Information}, VOLUME = {15}, YEAR = {2024}, NUMBER = {2}, ARTICLE-NUMBER = {68}, URL = {https://www.mdpi.com/2078-2489/15/2/68}, ISSN = {2078-2489}, ABSTRACT = {Systematic reviews are cumbersome yet essential to the epistemic process of medical science. Finding significant reports, however, is a daunting task because the sheer volume of published literature makes the manual screening of databases time-consuming. The use of Artificial Intelligence could make literature processing faster and more efficient. Sentence transformers are groundbreaking algorithms that can generate rich semantic representations of text documents and allow for semantic queries. In the present report, we compared four freely available sentence transformer pre-trained models (all-MiniLM-L6-v2, all-MiniLM-L12-v2, all-mpnet-base-v2, and All-distilroberta-v1) on a convenience sample of 6110 articles from a published systematic review. The authors of this review manually screened the dataset and identified 24 target articles that addressed the Focused Questions (FQ) of the review. We applied the four sentence transformers to the dataset and, using the FQ as a query, performed a semantic similarity search on the dataset. The models identified similarities between the FQ and the target articles to a varying degree, and, sorting the dataset by semantic similarities using the best-performing model (all-mpnet-base-v2), the target articles could be found in the top 700 papers out of the 6110 dataset. Our data indicate that the choice of an appropriate pre-trained model could remarkably reduce the number of articles to screen and the time to completion for systematic reviews.}, DOI = {10.3390/info15020068} } @InProceedings{HaCohen2017stancedetection, author="HaCohen-kerner, Yaakov and Ido, Ziv and Ya'akobov, Ronen", editor="Altun, Yasemin and Das, Kamalika and Mielik{\"a}inen, Taneli and Malerba, Donato and Stefanowski, Jerzy and Read, Jesse and {\v{Z}}itnik, Marinka and Ceci, Michelangelo and D{\v{z}}eroski, Sa{\v{s}}o", title="Stance Classification of Tweets Using Skip Char Ngrams", booktitle="Machine Learning and Knowledge Discovery in Databases", year="2017", publisher="Springer International Publishing", address="Cham", pages="266--278", abstract="In this research, we focus on automatic supervised stance classification of tweets. Given test datasets of tweets from five various topics, we try to classify the stance of the tweet authors as either in FAVOR of the target, AGAINST it, or NONE. We apply eight variants of seven supervised machine learning methods and three filtering methods using the WEKA platform. The macro-average results obtained by our algorithm are significantly better than the state-of-art results reported by the best macro-average results achieved in the SemEval 2016 Task 6-A for all the five released datasets. In contrast to the competitors of the SemEval 2016 Task 6-A, who did not use any char skip ngrams but rather used thousands of ngrams and hundreds of word embedding features, our algorithm uses a few tens of features mainly character-based features where most of them are skip char ngram features.", isbn="978-3-319-71273-4" } @article{Biber1988stance, author = {Douglas Biber and Edward Finegan}, title = {Adverbial stance types in English}, journal = {Discourse Processes}, volume = {11}, number = {1}, pages = {1-34}, year = {1988}, publisher = {Routledge}, doi = {10.1080/01638538809544689}, abstract = { The present paper identifies various speech styles of English as marked by stance adver‐bials. By stance we mean the overt expression of an author's or speaker's attitudes, feelings, judgments, or commitment concerning the message. Adverbials are one of the primary lexical markers of stance in English, and we limit ourselves in this paper to adverbial marking of stance (the attitudinal and style disjuncts presented in Quirk, Green‐baum, Leech, \& Svartvik, 1985). All occurrences of stance adverbials are identified in the LOB and London‐Lund corpora (410 texts of written and spoken British English), and each is analyzed in its sentential context to distinguish true markers of stance from adverbials that serve other functions (e.g., as manner adverbs). The adverbials marking stance are divided into six semantic categories, and the frequency of occurrence for each category in each text is computed. The six categories are labeled (1) honestly adverbials, (2) generally adverbials, (3) surely adverbials, (4) actually adverbials, (5) maybe adverbials, and (6) amazingly adverbials. Using a multivariate statistical technique called cluster analysis, texts that are maximally similar in their exploitation of these stance adverbials are grouped into clusters. We interpret each cluster by consideration of the frequent stance adverbials in the cluster, the situational characteristics of the texts in the cluster, and functional analyses of the stance adverbials in individual texts. Although the stance adverbials are grouped into categories on the basis of their literal meanings, the clusters are interpreted in terms of the discourse functions of the adverbials; in several cases, our analysis shows that the discourse functions of stance adverbials differ considerably from the functions suggested by their literal meanings. With respect to the adverbial marking of stance, eight styles are identified, including “Cautious,” “Secluded from Dispute,” and “Faceless.” } } @inproceedings{Li2019stance, title = "Multi-Task Stance Detection with Sentiment and Stance Lexicons", author = "Li, Yingjie and Caragea, Cornelia", editor = "Inui, Kentaro and Jiang, Jing and Ng, Vincent and Wan, Xiaojun", booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)", month = nov, year = "2019", address = "Hong Kong, China", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/D19-1657", doi = "10.18653/v1/D19-1657", pages = "6299--6305", abstract = "Stance detection aims to detect whether the opinion holder is in support of or against a given target. Recent works show improvements in stance detection by using either the attention mechanism or sentiment information. In this paper, we propose a multi-task framework that incorporates target-specific attention mechanism and at the same time takes sentiment classification as an auxiliary task. Moreover, we used a sentiment lexicon and constructed a stance lexicon to provide guidance for the attention layer. Experimental results show that the proposed model significantly outperforms state-of-the-art deep learning methods on the SemEval-2016 dataset.", } @article{Guo2023argumentative, title={Representing and Determining Argumentative Relevance in Online Discussions: A General Approach}, volume={17}, url={https://ojs.aaai.org/index.php/ICWSM/article/view/22146}, DOI={10.1609/icwsm.v17i1.22146}, abstractNote={Understanding an online argumentative discussion is essential for understanding users’ opinions on a topic and their underlying reasoning. A key challenge in determining completeness and persuasiveness of argumentative discussions is to assess how arguments under a topic are connected in a logical and coherent manner. Online argumentative discussions, in contrast to essays or face-to-face communication, challenge techniques for judging argument relevance because online discussions involve multiple participants and often exhibit incoherence in reasoning and inconsistencies in writing style. We define relevance as the logical and topical connections between small texts representing argument fragments in online discussions. We provide a corpus comprising pairs of sentences, labeled with argumentative relevance between the sentences in each pair. We propose a computational approach relying on content reduction and a Siamese neural network architecture for modeling argumentative connections and determining argumentative relevance between texts. Experimental results indicate that our approach is effective in measuring relevance between arguments, and outperforms strong and well-adopted baselines. Further analysis demonstrates the benefit of using our argumentative relevance encoding on a downstream task, predicting how impactful an online comment is to certain topic, comparing to encoding that does not consider logical connection.}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={Guo, Zhen and Singh, Munindar P.}, year={2023}, month={Jun.}, pages={292-302} } @inproceedings{mikolov2013wordtovec, title = "Linguistic Regularities in Continuous Space Word Representations", author = "Mikolov, Tomas and Yih, Wen-tau and Zweig, Geoffrey", editor = "Vanderwende, Lucy and Daum{\'e} III, Hal and Kirchhoff, Katrin", booktitle = "Proceedings of the 2013 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies", month = jun, year = "2013", address = "Atlanta, Georgia", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/N13-1090", pages = "746--751", } % eprint={2103.00336}, % archivePrefix={arXiv}, % primaryClass={cs.LG} @misc{lamb2021transformers, title={Transformers with Competitive Ensembles of Independent Mechanisms}, author={Alex Lamb and Di He and Anirudh Goyal and Guolin Ke and Chien-Feng Liao and Mirco Ravanelli and Yoshua Bengio}, year={2021}, } @inproceedings{Koch2015SiameseNN, title={Siamese Neural Networks for One-Shot Image Recognition}, author={Gregory R. Koch}, year={2015}, url={https://api.semanticscholar.org/CorpusID:13874643} } @InProceedings{Hoffer2015triplet, author="Hoffer, Elad and Ailon, Nir", editor="Feragen, Aasa and Pelillo, Marcello and Loog, Marco", title="Deep Metric Learning Using Triplet Network", booktitle="Similarity-Based Pattern Recognition", year="2015", publisher="Springer International Publishing", address="Cham", pages="84--92", abstract="Deep learning has proven itself as a successful set of models for learning useful semantic representations of data. These, however, are mostly implicitly learned as part of a classification task. In this paper we propose the triplet network model, which aims to learn useful representations by distance comparisons. A similar model was defined by Wang et al. (2014), tailor made for learning a ranking for image information retrieval. Here we demonstrate using various datasets that our model learns a better representation than that of its immediate competitor, the Siamese network. We also discuss future possible usage as a framework for unsupervised learning.", isbn="978-3-319-24261-3" } @incollection{McCloskey1989catastrophic, title = {Catastrophic Interference in Connectionist Networks: The Sequential Learning Problem}, editor = {Gordon H. Bower}, series = {Psychology of Learning and Motivation}, publisher = {Academic Press}, volume = {24}, pages = {109-165}, year = {1989}, issn = {0079-7421}, doi = {https://doi.org/10.1016/S0079-7421(08)60536-8}, url = {https://www.sciencedirect.com/science/article/pii/S0079742108605368}, author = {Michael McCloskey and Neal J. Cohen}, abstract = {Publisher Summary Connectionist networks in which information is stored in weights on connections among simple processing units have attracted considerable interest in cognitive science. Much of the interest centers around two characteristics of these networks. First, the weights on connections between units need not be prewired by the model builder but rather may be established through training in which items to be learned are presented repeatedly to the network and the connection weights are adjusted in small increments according to a learning algorithm. Second, the networks may represent information in a distributed fashion. This chapter discusses the catastrophic interference in connectionist networks. Distributed representations established through the application of learning algorithms have several properties that are claimed to be desirable from the standpoint of modeling human cognition. These properties include content-addressable memory and so-called automatic generalization in which a network trained on a set of items responds correctly to other untrained items within the same domain. New learning may interfere catastrophically with old learning when networks are trained sequentially. The analysis of the causes of interference implies that at least some interference will occur whenever new learning may alter weights involved in representing old learning, and the simulation results demonstrate only that interference is catastrophic in some specific networks.} } @inproceedings{Vahtola2022negation, title = "It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New {S}em{A}nto{N}eg Benchmark", author = {Vahtola, Teemu and Creutz, Mathias and Tiedemann, J{\"o}rg}, editor = "Bastings, Jasmijn and Belinkov, Yonatan and Elazar, Yanai and Hupkes, Dieuwke and Saphra, Naomi and Wiegreffe, Sarah", booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP", month = dec, year = "2022", address = "Abu Dhabi, United Arab Emirates (Hybrid)", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2022.blackboxnlp-1.20", doi = "10.18653/v1/2022.blackboxnlp-1.20", pages = "249--262", abstract = "We investigate to what extent a hundred publicly available, popular neural language models capture meaning systematically. Sentence embeddings obtained from pretrained or fine-tuned language models can be used to perform particular tasks, such as paraphrase detection, semantic textual similarity assessment or natural language inference. Common to all of these tasks is that paraphrastic sentences, that is, sentences that carry (nearly) the same meaning, should have (nearly) the same embeddings regardless of surface form. We demonstrate that performance varies greatly across different language models when a specific type of meaning-preserving transformation is applied: two sentences should be identified as paraphrastic if one of them contains a negated antonym in relation to the other one, such as {``}I am not guilty{''} versus {``}I am innocent{''}.We introduce and release SemAntoNeg, a new test suite containing 3152 entries for probing paraphrasticity in sentences incorporating negation and antonyms. Among other things, we show that language models fine-tuned for natural language inference outperform other types of models, especially the ones fine-tuned to produce general-purpose sentence embeddings, on the test suite. Furthermore, we show that most models designed explicitly for paraphrasing are rather mediocre in our task.", } @inproceedings{Qin2023LLM, title = "Is {C}hat{GPT} a General-Purpose Natural Language Processing Task Solver?", author = "Qin, Chengwei and Zhang, Aston and Zhang, Zhuosheng and Chen, Jiaao and Yasunaga, Michihiro and Yang, Diyi", editor = "Bouamor, Houda and Pino, Juan and Bali, Kalika", booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing", month = dec, year = "2023", address = "Singapore", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.emnlp-main.85", doi = "10.18653/v1/2023.emnlp-main.85", pages = "1339--1384", abstract = "Spurred by advancements in scale, large language models (LLMs) have demonstrated the ability to perform a variety of natural language processing (NLP) tasks zero-shot{---}i.e., without adaptation on downstream data. Recently, the debut of ChatGPT has drawn a great deal of attention from the natural language processing (NLP) community due to the fact that it can generate high-quality responses to human input and self-correct previous mistakes based on subsequent conversations. However, it is not yet known whether ChatGPT can serve as a generalist model that can perform many NLP tasks zero-shot. In this work, we empirically analyze the zero-shot learning ability of ChatGPT by evaluating it on 20 popular NLP datasets covering 7 representative task categories. With extensive empirical studies, we demonstrate both the effectiveness and limitations of the current version of ChatGPT. We find that ChatGPT performs well on many tasks favoring reasoning capabilities (e.g., arithmetic reasoning) while it still faces challenges when solving specific tasks such as sequence tagging. We additionally provide in-depth analysis through qualitative case studies.", } @inproceedings{ghafouri2024stance, title = "{I} love pineapple on pizza != {I} hate pineapple on pizza: Stance-Aware Sentence Transformers for Opinion Mining", author = "Ghafouri, Vahid and Such, Jose and Suarez-Tangil, Guillermo", editor = "Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung", booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing", month = nov, year = "2024", address = "Miami, Florida, USA", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.emnlp-main.1171", pages = "21046--21058", abstract = "Sentence transformers excel at grouping topically similar texts, but struggle to differentiate opposing viewpoints on the same topic. This shortcoming hinders their utility in applications where understanding nuanced differences in opinion is essential, such as those related to social and political discourse analysis. This paper addresses this issue by fine-tuning sentence transformers with arguments for and against human-generated controversial claims. We demonstrate how our fine-tuned model enhances the utility of sentence transformers for social computing tasks such as opinion mining and stance detection. We elaborate that applying stance-aware sentence transformers to opinion mining is a more computationally efficient and robust approach in comparison to the classic classification-based approaches.", } @article{singh2024discord, title={Differences in the Toxic Language of Cross-Platform Communities}, volume={18}, url={https://ojs.aaai.org/index.php/ICWSM/article/view/31402}, DOI={10.1609/icwsm.v18i1.31402}, abstractNote={Cross-platform communities are social media communities that have a presence on multiple online platforms. One active community on both Reddit and Discord is dankmemes. Our study aims to examine differences in harmful language usage across different platforms in a community. We scrape 15 communities that are active on both Reddit and Discord. We then identify and compare differences in type and level of toxicity, in the topics of the harmful discourse, in the temporal evolution of toxicity and its attribution to users, and in the moderation strategies communities across platforms. Our results show that most communities exhibit differences in toxicity depending on the platform. We see that toxicity is rooted in the different subcultures as well as in the way in which the platforms operate and their administrators moderate content. However, we note that in general terms Discord is significantly more toxic than Reddit. We offer a detailed analysis of the topics and types of communities in which this happens and why, which will help moderators and policymakers shape their strategies to mitigate the harm on the Web. In particular, we propose practical and effective strategies that Discord can implement to improve its platform moderation.}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={Singh, Ashwini Kumar and Ghafouri, Vahid and Such, Jose and Suarez-Tangil, Guillermo}, year={2024}, month={May}, pages={1463-1476} } @misc{ghafouri2024sexism, title={A Holistic Indicator of Polarization to Measure Online Sexism}, author={Vahid Ghafouri and Jose Such and Guillermo Suarez-Tangil}, year={2024}, eprint={2404.02205}, archivePrefix={arXiv}, primaryClass={cs.SI}, url={https://arxiv.org/abs/2404.02205}, } @misc{mikolov2013embeddings, title={Exploiting Similarities among Languages for Machine Translation}, author={Tomas Mikolov and Quoc V. Le and Ilya Sutskever}, year={2013}, eprint={1309.4168}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/1309.4168}, } @misc{jimenez2021uncovering, title={Uncovering Correlations Between Two UMAP Hyperparameters and the Input Dataset}, author={Jimenez Villalonga, Federico}, year={2021} } @article{zhang2022neural, title={Is Neural Topic Modelling Better than Clustering? An Empirical Study on Clustering with Contextual Embeddings for Topics}, author={Zhang, Zihan and Fang, Meng and Chen, Ling and Namazi-Rad, Mohammad-Reza}, journal={arXiv preprint arXiv:2204.09874}, year={2022} } @article{erdem2021animalRecognitionWithSiameseNetworksAndMeanEmbeddings, title = "Animal recognition with Siamese Networks and Mean Embeddings", author = "Kemal Erdem", journal = "https://erdem.pl", year = "2021", month = "Feb", url = "https://erdem.pl/2021/02/animal-recognition-with-siamese-networks-and-mean-embeddings" } @misc{tripletloss2023, title = {Triplet Loss with Keras and TensorFlow}, url = {https://pyimagesearch.com/2023/03/06/triplet-loss-with-keras-and-tensorflow/}, author = {Shivam Chandhok}, year = {2023}, month = "May" } @inproceedings{zhang2024solving, title = "Solving General Natural-Language-Description Optimization Problems with Large Language Models", author = "Zhang, Jihai and Wang, Wei and Guo, Siyan and Wang, Li and Lin, Fangquan and Yang, Cheng and Yin, Wotao", editor = "Yang, Yi and Davani, Aida and Sil, Avi and Kumar, Anoop", booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)", month = jun, year = "2024", address = "Mexico City, Mexico", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.naacl-industry.42", doi = "10.18653/v1/2024.naacl-industry.42", pages = "483--490", abstract = "Optimization problems seek to find the best solution to an objective under a set of constraints, and have been widely investigated in real-world applications. Modeling and solving optimization problems in a specific domain typically require a combination of domain knowledge, mathematical skills, and programming ability, making it difficult for general users and even domain professionals. In this paper, we propose a novel framework called OptLLM that augments LLMs with external solvers. Specifically, OptLLM accepts user queries in natural language, convert them into mathematical formulations and programming codes, and calls the solvers to calculate the results for decision-making. In addition, OptLLM supports multi-round dialogues to gradually refine the modeling and solving of optimization problems. To illustrate the effectiveness of OptLLM, we provide tutorials on three typical optimization applications and conduct experiments on both prompt-based GPT models and a fine-tuned Qwen model using a large-scale self-developed optimization dataset. Experimental results show that OptLLM works with various LLMs, and the fine-tuned model achieves an accuracy boost compared to the prompt-based models. Some features of OptLLM framework have been available for trial since June 2023 (https://opt.alibabacloud.com/chat or https://opt.aliyun.com/chat).", } @article{Semaan2014depolarization, title={Social media supporting political deliberation across multiple public spheres: towards depolarization}, author={Bryan C. Semaan and Scott P. Robertson and Sara K. Douglas and Misa Maruyama}, journal={Proceedings of the 17th ACM conference on Computer supported cooperative work and social computing}, year={2014}, doi={10.1145/2531602.2531605} } @article{Saveski2022depolarizationempirical, title={Engaging Politically Diverse Audiences on Social Media}, volume={16}, url={https://ojs.aaai.org/index.php/ICWSM/article/view/19342}, DOI={10.1609/icwsm.v16i1.19342}, abstractNote={We study how political polarization is reflected in the social media posts used by media outlets to promote their content online. In particular, we track the Twitter posts of several media outlets over the course of more than three years (566K tweets), and the engagement with these tweets from other users (104M retweets), modeling the relationship between the tweet text and the political diversity of the audience. We build a tool that integrates our model and helps journalists craft tweets that are engaging to a politically diverse audience, guided by the model predictions. To test the real-world impact of the tool, we partner with the PBS documentary series Frontline and run a series of advertising experiments on Twitter. We find that in seven out of the ten experiments, the tweets selected by our model were indeed engaging to a more politically diverse audience, reducing the gap in engagement between left- and right-leaning users by 20.3%, on average, and illustrating the effectiveness of our approach.}, number={1}, journal={Proceedings of the International AAAI Conference on Web and Social Media}, author={Saveski, Martin and Beeferman, Doug and McClure, David and Roy, Deb}, year={2022}, month={May}, pages={873-884} } @article{Kinoshita2022radicalization, title={A Spectral-Based Model for Describing Social Polarization in Online Communities}, author={Tomoya Kinoshita and Masaki Aida}, journal={IEICE Trans. Commun.}, year={2022}, volume={105-B}, pages={1181-1191}, doi={10.1587/transcom.2021mep0001} } @article{Borrelli2022counter, title={A Quantitative and Content-Based Approach for Evaluating the Impact of Counter Narratives on Affective Polarization in Online Discussions}, author={Dario Borrelli and L. Iandoli and J. Ramírez-Márquez and Carlo Lipizzi}, journal={IEEE Transactions on Computational Social Systems}, year={2022}, volume={9}, pages={914-925}, doi={10.1109/tcss.2021.3094775} } @article{Pal2022Depolarization, title={Depolarization of opinions on social networks through random nudges.}, author={Ritam Pal and Aanjaneya Kumar and M. S. Santhanam}, journal={Physical review. E}, year={2022}, volume={108 3-1}, pages={ 034307 }, doi={10.1103/physreve.108.034307} } @inproceedings{hasanaath2024stancecrafters, title = "{S}tance{C}rafters at {S}tance{E}val2024: Multi-task Stance Detection using {BERT} Ensemble with Attention Based Aggregation", author = "Hasanaath, Ahmed and Alansari, Aisha", editor = "Habash, Nizar and Bouamor, Houda and Eskander, Ramy and Tomeh, Nadi and Abu Farha, Ibrahim and Abdelali, Ahmed and Touileb, Samia and Hamed, Injy and Onaizan, Yaser and Alhafni, Bashar and Antoun, Wissam and Khalifa, Salam and Haddad, Hatem and Zitouni, Imed and AlKhamissi, Badr and Almatham, Rawan and Mrini, Khalil", booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference", month = aug, year = "2024", address = "Bangkok, Thailand", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2024.arabicnlp-1.94", doi = "10.18653/v1/2024.arabicnlp-1.94", pages = "811--815", abstract = "Stance detection is a key NLP problem that classifies a writer{'}s viewpoint on a topic based on their writing. This paper outlines our approach for Stance Detection in Arabic Language Shared Task (StanceEval2024), focusing on attitudes towards the COVID-19 vaccine, digital transformation, and women{'}s empowerment. The proposed model uses parallel multi-task learning with two fine-tuned BERT-based models combined via an attention module. Results indicate this ensemble outperforms a single BERT model, demonstrating the benefits of using BERT architectures trained on diverse datasets. Specifically, Arabert-Twitterv2, trained on tweets, and Camel-Lab, trained on Modern Standard Arabic (MSA), Dialectal Arabic (DA), and Classical Arabic (CA), allowed us to leverage diverse Arabic dialects and styles.", } | es |