@misc{ibm_patent,author={Agrawal, Bhavna and Khabiri, Elham and Li, Yingjie and Sankhe, Pranav Girish},title={Generating Unique Word Embeddings for Jargon-Specific Tabular Data for Neural Network Training and Usage},howpublished={U.S. Patent 17,483,989. Mar. 30, 2023},year={2023},url={https://scholar.google.com/citations?view_op=view_citation&hl=en&user=YUlF75QAAAAJ&citation_for_view=YUlF75QAAAAJ:2osOgNQ5qMEC},notes={U.S. Patent 17,483,989. Mar. 30, 2023},}
2022
Mutual Information Scoring: Increasing Interpretability In Categorical Clustering Tasks With Applications To Child Welfare Data
Pranav Sankhe, Seventy F. Hall, Melanie Sage, Maria Y. Rodriguez, Varun Chandola, and Kenneth Joseph
In Social, Cultural, and Behavioral Modeling: 15th International Conference, SBP-BRiMS 2022, Pittsburgh, PA, USA, September 20–23, 2022, Proceedings, 2022
Youth in the American foster care system are significantly more likely than their peers to face a number of negative life outcomes, from homelessness to incarceration. Administrative data on these youth have the potential to provide insights that can help identify ways to improve their path towards a better life. However, such data also suffer from a variety of biases, from missing data to reflections of systemic inequality. The present work proposes a novel, prescriptive approach to using these data to provide insights about both data biases and the systems and youth they track. Specifically, we develop a novel categorical clustering and cluster summarization methodology that allows us to gain insights into subtle biases in existing data on foster youth, and to provide insight into where further (often qualitative) research is needed to identify potential ways of assisting youth.
@inproceedings{10.1007/978-3-031-17114-7_16,author={Sankhe, Pranav and Hall, Seventy F. and Sage, Melanie and Rodriguez, Maria Y. and Chandola, Varun and Joseph, Kenneth},title={Mutual Information Scoring: Increasing Interpretability In Categorical Clustering Tasks With Applications To Child Welfare Data},year={2022},isbn={978-3-031-17113-0},publisher={Springer-Verlag},address={Berlin, Heidelberg},url={https://doi.org/10.1007/978-3-031-17114-7_16},doi={10.1007/978-3-031-17114-7_16},booktitle={Social, Cultural, and Behavioral Modeling: 15th International Conference, SBP-BRiMS 2022, Pittsburgh, PA, USA, September 20–23, 2022, Proceedings},pages={165–175},numpages={11},keywords={Categorical data, Clustering, Foster care youth},location={Pittsburgh, PA, USA},}
A qualitative, network-centric method for modeling socio-technical systems, with applications to evaluating interventions on social media platforms to increase social equality
Kenneth Joseph, Winnie Chen, Stefania Ionescu, Yuhao Du, Pranav Sankhe, Aniko Hannak, and Atri Rudra
We propose and extend a qualitative, complex systems methodology from cognitive engineering, known as the abstraction hierarchy, to model how potential interventions that could be carried out by social media platforms might impact social equality. Social media platforms have come under considerable ire for their role in perpetuating social inequality. However, there is also significant evidence that platforms can play a role in reducing social inequality, e.g. through the promotion of social movements. Platforms’ role in producing or reducing social inequality is, moreover, not static; platforms can and often do take actions targeted at positive change. How can we develop tools to help us determine whether or not a potential platform change might actually work to increase social equality? Here, we present the abstraction hierarchy as a tool to help answer this question. Our primary contributions are two‐fold. First, methodologically, we extend existing research on the abstraction hierarchy in cognitive engineering with principles from Network Science. Second, substantively, we illustrate the utility of this approach by using it to assess the potential effectiveness of a set of interventions, pro‐ posed in prior work, for how online dating websites can help mitigate social inequality.
@article{network,author={Joseph, Kenneth and Chen, Winnie and Ionescu, Stefania and Du, Yuhao and Sankhe, Pranav and Hannak, Aniko and Rudra, Atri},year={2022},month=jul,pages={},title={A qualitative, network-centric method for modeling socio-technical systems, with applications to evaluating interventions on social media platforms to increase social equality},volume={7},journal={Applied Network Science},doi={10.1007/s41109-022-00486-8},}
2021
TableNN: Deep Learning Framework for Learning Domain Specific Tabular Data
P. Sankhe, E. Khabiri, B. Agrawal, and Y. Li
In 2021 IEEE International Conference on Big Data (Big Data), Dec 2021
Enterprises often have a large number of databases and other sources of tabular data with columns full of domain-specific jargon (e.g. alpha-numeric codings, undeclared abbreviations, etc) which usually require domain experts to decode. Due to the jargon-specific content of the tables, no pre-trained language model such as Wiki2Vec [21] can be applied readily to encode the cell semantics due to absence of unique jorgan words or alpha-numeric codes in the model vocabulary. We propose a deep learning based framework that is ideally suited for serverless computing environment, and that 1) uses a new tokenization method, called Cell-Masking, 2) encodes the semantics of the cells into contextual embedding that exploits the locality features in tabular data, called Cell2Vec, and 3) an attention-based neural network, called TableNN, that provides a supervised learning solution to classify cell entries into predefined column classes. We apply the proposed method on three publicly available datasets of varying data sizes, from different industries. Cell-Masking provides an order of magnitude lower loss value and quickest convergence for cell embedding generation. In Cell2Vec, we demonstrate that the inclusion of row and column context improves the quality of embeddings by better loss curve convergence and improvement in accuracy by 5.4% on the BTS dataset.
@inproceedings{tablenn,author={Sankhe, P. and Khabiri, E. and Agrawal, B. and Li, Y.},booktitle={2021 IEEE International Conference on Big Data (Big Data)},title={TableNN: Deep Learning Framework for Learning Domain Specific Tabular Data},year={2021},volume={},issn={},pages={4097-4102},keywords={deep learning;vocabulary;computational modeling;semantics;supervised learning;neural networks;serverless computing},doi={10.1109/BigData52589.2021.9671972},url={https://doi.ieeecomputersociety.org/10.1109/BigData52589.2021.9671972},publisher={IEEE Computer Society},address={Los Alamitos, CA, USA},month=dec,}
2019
Fast 3D Hand Pose Estimation Using Dynamic Graph NN
P. Sankhe, Y. Junsong, F. Chen, and S. Xiaohu
Unpublised Manuscript, ByteDance Research, Dec 2019