SMILE

Stochastic Models for the Inference of Life Evolution

Bibtex

@article{lambert_allelic_2009,
Author = {Lambert, Amaury},
Title = {The allelic partition for coalescent point processes},
Journal = {Markov Proc. Relat. Fields},
Volume = {15},
Pages = {359--386},
abstract = {Assume that individuals alive at time t in some
population can be ranked in such a way that the
coalescence times between consecutive individuals are
i.i.d. The ranked sequence of these branches is called
a coalescent point process. We have shown in a previous
work [14] that splitting trees are important instances
of such populations. Here, individuals are given DNA
sequences, and for a sample of n DNA sequences
belonging to distinct individuals, we consider the
number Sn of polymorphic sites (sites at which at least
two sequences differ), and the number An of distinct
haplotypes (sequences differing at one site at least).
It is standard to assume that mutations arrive at
constant rate (on germ lines), and never hit the same
site on the DNA sequence. We study the mutation pattern
associated with coalescent point processes under this
assumption. Here, Sn and An grow linearly as n grows,
with explicit rate. However, when the branch lengths
have infinite expectation, Sn grows more rapidly, e.g.
as n ln(n) for critical birth-death processes. Then, we
study the frequency spectrum of the sample, that is,
the numbers of polymorphic sites/haplotypes carried by
k individuals in the sample. These numbers are shown to
grow also linearly with sample size, and we provide
simple explicit formulae for mutation frequencies and
haplotype frequencies. For critical birth-death
processes, mutation frequencies are given by the
harmonic series and haplotype frequencies by Fisher's
logarithmic series. Running head. The allelic partition
for coalescent point processes.},
year = 2009
}