@article{unic14342, journal = {BMC Cancer}, month = {Julio}, author = {Seher Ansar Khawaja and Muhammad Shoaib Farooq and Kashif Ishaq and Najah Alsubaie and Hanen Karamti and Elizabeth Caro Montero and Eduardo Ren{\'e} Silva Alvarado and Imran Ashraf}, year = {2024}, number = {1}, volume = {24}, title = {Prediction of leukemia peptides using convolutional neural network and protein compositions}, url = {http://repositorio.unic.co.ao/id/eprint/14342/}, keywords = {Leukemia detection; Protein sequences; Deep learning; Convolutional neural network}, abstract = {Leukemia is a type of blood cell cancer that is in the bone marrow?s blood-forming cells. Two types of Leukemia are acute and chronic; acute enhances fast and chronic growth gradually which are further classified into lymphocytic and myeloid leukemias. This work evaluates a unique deep convolutional neural network (CNN) classifier that improves identification precision by carefully examining concatenated peptide patterns. The study uses leukemia protein expression for experiments supporting two different techniques including independence and applied cross-validation. In addition to CNN, multilayer perceptron (MLP), gated recurrent unit (GRU), and recurrent neural network (RNN) are applied. The experimental results show that the CNN model surpasses competitors with its outstanding predictability in independent and cross-validation testing applied on different features extracted from protein expressions such as amino acid composition (AAC) with a group of AAC (GAAC), tripeptide composition (TPC) with a group of TPC (GTPC), and dipeptide composition (DPC) for calculating its accuracies with their receiver operating characteristic (ROC) curve. In independence testing, a feature expression of AAC and a group of GAAC are applied using MLP and CNN modules, and ROC curves are achieved with overall 100\% accuracy for the detection of protein patterns. In cross-validation testing, a feature expression on a group of AAC and GAAC patterns achieved 98.33\% accuracy which is the highest for the CNN module. Furthermore, ROC curves show a 0.965\% extraordinary result for the GRU module. The findings show that the CNN model is excellent at figuring out leukemia illnesses from protein expressions with higher accuracy.} }