dorsal/arxiv
View SchemaExact Asymptotic Results for a Model of Sequence Alignment
| Authors | Satya N. Majumdar, Sergei Nechaev |
|---|---|
| Categories | |
| ArXiv ID | q-bio/0410012 |
| URL | https://arxiv.org/abs/q-bio/0410012 |
| DOI | 10.1103/PhysRevE.72.020901 |
| Journal | Phys. Rev. E, 72, 020901 (2005) |
Abstract
Finding analytically the statistics of the longest common subsequence (LCS) of a pair of random sequences drawn from c alphabets is a challenging problem in computational evolutionary biology. We present exact asymptotic results for the distribution of the LCS in a simpler, yet nontrivial, variant of the original model called the Bernoulli matching (BM) model which reduces to the original model in the large c limit. We show that in the BM model, for all c, the distribution of the asymptotic length of the LCS, suitably scaled, is identical to the Tracy-Widom distribution of the largest eigenvalue of a random matrix whose entries are drawn from a Gaussian unitary ensemble. In particular, in the large c limit, this provides an exact expression for the asymptotic length distribution in the original LCS problem.
{
"annotation_id": "75f3a5cd-80ef-42ec-9e14-a97132566b9d",
"date_created": "2026-03-02T18:01:31.488000Z",
"date_modified": "2026-03-02T18:01:31.488000Z",
"file_hash": "7b1361fe327b91fddb6825b06e633a8368d71ad32b1387a1cbed9b11c6348d9b",
"private": false,
"record": {
"abstract": "Finding analytically the statistics of the longest common subsequence (LCS)\nof a pair of random sequences drawn from c alphabets is a challenging problem\nin computational evolutionary biology. We present exact asymptotic results for\nthe distribution of the LCS in a simpler, yet nontrivial, variant of the\noriginal model called the Bernoulli matching (BM) model which reduces to the\noriginal model in the large c limit. We show that in the BM model, for all c,\nthe distribution of the asymptotic length of the LCS, suitably scaled, is\nidentical to the Tracy-Widom distribution of the largest eigenvalue of a random\nmatrix whose entries are drawn from a Gaussian unitary ensemble. In particular,\nin the large c limit, this provides an exact expression for the asymptotic\nlength distribution in the original LCS problem.",
"arxiv_id": "q-bio/0410012",
"authors": [
"Satya N. Majumdar",
"Sergei Nechaev"
],
"categories": [
"q-bio.GN",
"cond-mat.stat-mech",
"math.ST",
"stat.TH"
],
"doi": "10.1103/PhysRevE.72.020901",
"journal_ref": "Phys. Rev. E, 72, 020901 (2005)",
"title": "Exact Asymptotic Results for a Model of Sequence Alignment",
"url": "https://arxiv.org/abs/q-bio/0410012"
},
"schema_id": "dorsal/arxiv",
"source": {
"execution_id": "51c76362-f325-40cc-a7af-3755df8a0c6e",
"id": "arXiv Dataset IDs",
"type": "Model",
"variant": "snapshot-2026-03-01",
"version": "0.1.0"
},
"user_id": 1000002
}