diff --git a/_images/gaussian_IQ.png b/_images/gaussian_IQ.png
new file mode 100644
index 00000000..74147280
Binary files /dev/null and b/_images/gaussian_IQ.png differ
diff --git a/_images/gaussian_histogram.png b/_images/gaussian_histogram.png
new file mode 100644
index 00000000..9544a26f
Binary files /dev/null and b/_images/gaussian_histogram.png differ
diff --git a/_images/gaussian_transformed.png b/_images/gaussian_transformed.png
new file mode 100644
index 00000000..3b56ceb4
Binary files /dev/null and b/_images/gaussian_transformed.png differ
diff --git a/conf.py b/conf.py
index 0df64d0f..7c2c60ff 100644
--- a/conf.py
+++ b/conf.py
@@ -22,6 +22,7 @@
     'sphinx.ext.mathjax',
     'sphinx.ext.autosectionlabel',
     #'sphinxcontrib.tikz', #added for dutch
+    "sphinxcontrib.mermaid"
 ]
 
 mathjax_path = "mathjax/tex-mml-chtml.js" # so that the textbook can work offline
diff --git a/content/detection.rst b/content/detection.rst
index 9c12cea6..5a3b7021 100644
--- a/content/detection.rst
+++ b/content/detection.rst
@@ -433,43 +433,21 @@ To operate in real-time, we will accumulate samples in **buffers** (chunks of, s
 
 Implementation
 ##############
-
 Our detector will follow this workflow:
 
-.. code-block:: text
-
-    ┌─────────────────────────────────────────────────────────────┐
-    │  Continuous IQ Stream from SDR (e.g., 1 MHz sample rate)    │
-    └────────────────────┬────────────────────────────────────────┘
-                         │
-                         ▼
-    ┌─────────────────────────────────────────────────────────────┐
-    │  Buffer Accumulation (e.g., 100k samples = 0.1 sec)         │
-    └────────────────────┬────────────────────────────────────────┘
-                         │
-                         ▼
-    ┌─────────────────────────────────────────────────────────────┐
-    │  Cross-Correlation with Known Preamble                      │
-    │  → Produces correlation vs. sample index                    │
-    └────────────────────┬────────────────────────────────────────┘
-                         │
-                         ▼
-    ┌─────────────────────────────────────────────────────────────┐
-    │  CFAR Threshold Computation                                 │
-    │  → Adaptive threshold that tracks noise floor               │
-    └────────────────────┬────────────────────────────────────────┘
-                         │
-                         ▼
-    ┌─────────────────────────────────────────────────────────────┐
-    │  Peak Detection (correlation > threshold)                   │
-    │  → List of candidate packet start indices                   │
-    └────────────────────┬────────────────────────────────────────┘
-                         │
-                         ▼
-    ┌─────────────────────────────────────────────────────────────┐
-    │  Packet Extraction & Validation                             │
-    │  → Extract samples, pass to demodulator                     │
-    └─────────────────────────────────────────────────────────────┘
+.. mermaid::
+
+   flowchart TD
+
+       A("Continuous IQ Stream from SDR<br/>(1 MHz sample rate)")
+       B("Buffer Accumulation<br/>(100k samples = 0.1 sec)")
+       C("Cross-Correlation with Known Preamble")
+       D("CFAR Threshold Computation")
+       E("Peak Detection<br/>(correlation > threshold)")
+       F("Packet Extraction & Validation")
+
+       A --> B --> C --> D --> E --> F
+
 
 To avoid missing packets that straddle buffer boundaries, we use an **overlap-save** approach, where each buffer includes the last ``N_preamble`` samples from the previous buffer.  This ensures any packet starting near the end of buffer ``i`` will be fully contained in buffer ``i+1``.  This requires a small additional computational overhead but we don't want to miss packets just because they straddle buffer boundaries.
 
diff --git a/content/random_variables.rst b/content/random_variables.rst
new file mode 100644
index 00000000..0940ef23
--- /dev/null
+++ b/content/random_variables.rst
@@ -0,0 +1,362 @@
+.. _random-variables-chapter:
+
+######################################
+Random Variables and Random Processes
+######################################
+
+In this chapter we introduce the fundamental concepts of random variables and random processes, which are essential for understanding noise, channel effects, and many signal processing techniques in wireless communications. We'll cover probability distributions, expectation, variance, and how random processes evolve over time. These concepts form the mathematical foundation for analyzing noise in the :ref:`noise-chapter` chapter and many other topics throughout SDR and DSP.
+
+***************************
+What is a Random Variable?
+***************************
+
+A **random variable** is a mathematical concept that maps outcomes of a random experiment to numerical values. Unlike the deterministic signals we've worked with so far, random variables represent quantities whose values are uncertain until they are observed or measured.
+
+Think of rolling a six-sided die. Before you roll it, you don't know what number will appear. We can define a random variable :math:`X` that represents the outcome of the roll. The value of :math:`X` is one of {1, 2, 3, 4, 5, 6}, but we don't know which one until we actually roll the die.
+
+In the context of wireless communications and SDR, random variables are everywhere:
+
+* The thermal noise in a receiver is modeled as a random variable at each instant in time
+* The amplitude of a received signal affected by fading is random
+* The phase offset introduced by a channel can be modeled as random
+* Even the data bits we transmit can be treated as random variables (if we don't know them ahead of time)
+
+**Single Sample vs. Many Samples**
+
+This is a crucial distinction that often causes confusion:
+
+* A **single realization** or **single sample** of a random variable is just one number—one outcome of the random experiment
+* To characterize a random variable (find its average, spread, etc.), we need **many realizations**—many outcomes
+
+For example, if you call ``np.random.randn()`` in Python without any arguments, it returns a single random number drawn from a Gaussian distribution. That single number tells you almost nothing about the distribution itself. But if you call ``np.random.randn(10000)`` and generate 10,000 samples, you can now estimate properties of the distribution like its mean and variance.
+
+.. code-block:: python
+
+ import numpy as np
+
+ # Single sample - just one number
+ x_single = np.random.randn()
+ print(x_single)  # might be 0.534, -1.23, or any other value
+
+ # Many samples - now we can characterize the distribution
+ x_many = np.random.randn(10000)
+ print(np.mean(x_many))  # will be close to 0
+ print(np.var(x_many))   # will be close to 1
+
+Joint Distributions
+####################
+
+So far we've focused on single random variables. When dealing with two or more random variables simultaneously, we use a **joint distribution**.
+
+For continuous variables :math:`X` and :math:`Y`, this is described by the **joint PDF**:
+
+.. math::
+   f_{X,Y}(x,y)
+
+The joint PDF tells us how likely it is for :math:`X` to take value :math:`x` *and* :math:`Y` to take value :math:`y` at the same time.
+
+From the joint PDF, we can compute:
+
+* Marginal PDFs (e.g., :math:`f_X(x)` or :math:`f_Y(y)`)
+* Expectations such as :math:`E[XY]`
+* Covariance and correlation
+* Probabilities involving both variables
+
+For example, the marginal PDF of :math:`X` is obtained by integrating out :math:`Y`:
+
+.. math::
+   f_X(x) = \int_{-\infty}^{\infty} f_{X,Y}(x,y)\,dy
+
+Joint distributions are the mathematical foundation for understanding dependence, correlation, and independence between random variables.
+
+***************************
+Probability Distributions
+***************************
+
+A **probability distribution** describes how likely different values of a random variable are. For a continuous random variable, we use a **probability density function (PDF)**, denoted :math:`f_X(x)`. The PDF tells us the relative likelihood of the random variable taking on different values.
+
+The most important distribution in SDR and communications is the **Gaussian (Normal) distribution**. A Gaussian random variable :math:`X` with mean :math:`\mu` and variance :math:`\sigma^2` has the PDF:
+
+.. math::
+   f_X(x) = \frac{1}{\sqrt{2\pi\sigma^2}} e^{-\frac{(x-\mu)^2}{2\sigma^2}}
+
+This is the famous "bell curve" you've likely seen before. The distribution is completely characterized by two parameters:
+
+* **Mean** :math:`\mu`: the center of the distribution
+* **Variance** :math:`\sigma^2`: how spread out the distribution is (standard deviation :math:`\sigma` is the square root of variance)
+
+In Python, ``np.random.randn()`` generates samples from a **standard Gaussian** distribution with :math:`\mu = 0` and :math:`\sigma^2 = 1`. We can visualize this:
+
+.. code-block:: python
+
+ import numpy as np
+ import matplotlib.pyplot as plt
+
+ # Generate 10,000 samples from standard Gaussian
+ x = np.random.randn(10000)
+
+ # Create histogram to visualize the distribution
+ plt.hist(x, bins=50, density=True, alpha=0.7, edgecolor='black')
+ plt.xlabel('Value')
+ plt.ylabel('Probability Density')
+ plt.title('Gaussian Distribution (μ=0, σ²=1)')
+ plt.grid(True)
+ plt.show()
+
+.. image:: ../_images/gaussian_histogram.png
+   :scale: 80%
+   :align: center
+   :alt: Histogram of Gaussian distributed samples
+
+Expectation (Mean)
+#######################
+
+The **expectation** or **expected value** of a random variable, denoted :math:`E[X]` or :math:`\mu`, represents its average value over many realizations. For a continuous random variable with PDF :math:`f_X(x)`, the expectation is:
+
+.. math::
+   E[X] = \int_{-\infty}^{\infty} x \cdot f_X(x) \, dx
+
+In practice, when we have :math:`N` samples :math:`x_1, x_2, \ldots, x_N` drawn from the distribution, we estimate the expectation using the **sample mean**:
+
+.. math::
+   \hat{\mu} = \frac{1}{N} \sum_{n=1}^{N} x_n
+
+The expectation is a **linear operator**, which means:
+
+* :math:`E[aX + b] = aE[X] + b` for constants :math:`a` and :math:`b`
+* :math:`E[X + Y] = E[X] + E[Y]` for any two random variables
+
+This linearity is extremely useful in signal processing!
+
+Variance and Standard Deviation
+#################################
+
+The **variance** of a random variable, denoted :math:`\text{Var}(X)` or :math:`\sigma^2`, measures how spread out its values are around the mean. It's defined as the expected value of the squared deviation from the mean:
+
+.. math::
+   \text{Var}(X) = E[(X - \mu)^2] = E[X^2] - (E[X])^2
+
+When we have :math:`N` samples, we estimate variance using:
+
+.. math::
+   \hat{\sigma}^2 = \frac{1}{N} \sum_{n=1}^{N} (x_n - \hat{\mu})^2
+
+The **standard deviation** :math:`\sigma` is simply the square root of variance: :math:`\sigma = \sqrt{\sigma^2}`.
+
+*Notice the* ^ (hat) *in the above equation at* :math:`\sigma` *and that for sample mean. The hat symbolizes we're estimating the mean/variance. It's not always exactly equal to the true mean/variance, but it gets closer to the true value as we increase the number of samples*
+
+**Key Property:** If :math:`X` is a random variable with variance :math:`\sigma^2`, then:
+
+* Scaling: :math:`\text{Var}(aX) = a^2 \text{Var}(X)`
+* Shifting: :math:`\text{Var}(X + b) = \text{Var}(X)` (adding a constant doesn't change the spread)
+
+And consequently for standard deviation :math:`\sigma`:
+
+* Scaling: :math:`\sigma(aX) = a\sigma(X)`
+* Shifting: :math:`\sigma(X+b) = \sigma(X)`
+
+.. image:: ../_images/gaussian_transformed.png
+   :scale: 80%
+   :align: center
+   :alt: Scaling and shifting the Gaussian Distribution. (notice the scales on x and y axes) 
+
+Scaling and shifting the Gaussian Distribution. (notice the scales on x and y axes)
+
+**Variance and Power**
+
+In signal processing, for a **zero-mean** signal (mean ~ 0), the variance equals the **average power**. This is why we often use the terms interchangeably:
+
+.. math::
+   P = \text{Var}(X) = E[X^2] \quad \text{(when } E[X] = 0\text{)}
+
+This relationship is fundamental in analyzing noise power, signal-to-noise ratio (SNR), and link budgets.
+
+.. code-block:: python
+
+ noise_power = 2.0
+ n = np.random.randn(N) * np.sqrt(noise_power)
+ print(np.var(n))  # will be approximately 2.0
+
+Covariance
+------------------
+
+The **covariance** between two random variables :math:`X` and :math:`Y` is defined as:
+
+.. math::
+   \text{Cov}(X,Y) = E[(X - E[X])(Y - E[Y])]
+
+An equivalent and often more convenient form is:
+
+.. math::
+   \text{Cov}(X,Y) = E[XY] - E[X]E[Y]
+
+Covariance measures how two variables vary together:
+
+* Positive covariance: they tend to increase and decrease together
+* Negative covariance: one tends to increase when the other decreases
+* Zero covariance: they are uncorrelated
+
+If both variables are zero-mean, this simplifies to:
+
+.. math::
+   \text{Cov}(X,Y) = E[XY]
+
+Covariance has units (it is not normalized), which is why we often use the **correlation coefficient** (or simply correlation) in practice:
+
+.. math::
+   \rho_{XY} = \frac{\text{Cov}(X,Y)}{\sigma_X \sigma_Y}
+
+This produces a dimensionless value between −1 and +1.
+
+Variance of a Sum of Variables
+###############################
+
+In signal processing we often deal with sums of random variables, such as a signal plus noise:
+
+.. math::
+   Z = X + Y
+
+The variance of this sum depends on whether :math:`X` and :math:`Y` are independent (or more generally, correlated).
+
+In full generality:
+
+.. math::
+   \text{Var}(X + Y) = \text{Var}(X) + \text{Var}(Y) + 2\,\text{Cov}(X,Y)
+
+where :math:`\text{Cov}(X,Y)` is the **covariance** between :math:`X` and :math:`Y`.
+
+**Independent Case**
+
+If :math:`X` and :math:`Y` are independent (or simply uncorrelated), then the expression simplifies to:
+
+.. math::
+   \text{Var}(X + Y) = \text{Var}(X) + \text{Var}(Y)
+
+This result is extremely important in communications. For example, if a received signal is:
+
+.. math::
+   R = S + N
+
+where :math:`S` is the signal and :math:`N` is independent noise, then the total power is just the sum of signal power and noise power.
+
+This is why SNR calculations are so straightforward.
+
+***********************************
+Complex Random Variables
+***********************************
+
+In SDR, we work extensively with **complex-valued signals**, which means we also work with complex random variables. A complex random variable has the form:
+
+.. math::
+   Z = X + jY
+
+where :math:`X` and :math:`Y` are both real-valued random variables representing the in-phase (I) and quadrature (Q) components.
+
+**Complex Gaussian Noise**
+
+The most common complex random variable in wireless communications is **complex Gaussian noise**, where both :math:`X` and :math:`Y` are independent Gaussian random variables with the same variance.
+
+For example, if :math:`X \sim \mathcal{N}(\alpha_1, \sigma_1^2)` and :math:`Y \sim \mathcal{N}(\alpha_2, \sigma_2^2)` are independent, then the complex random variable :math:`Z = X + jY` has:
+
+* Mean: :math:`E[Z] = E[X] + jE[Y] = \alpha_1 + j\alpha_2`
+* Variance (Power): :math:`\text{Var}(Z) = \text{Var}(X) + \text{Var}(Y) = \sigma_1^2 + \sigma_2^2`
+
+.. image:: ../_images/gaussian_IQ.png
+   :scale: 80%
+   :align: center
+
+This is why when we create complex Gaussian noise with unit power (variance = 1), we use:
+
+.. code-block:: python
+
+ N = 10000
+ n = (np.random.randn(N) + 1j*np.random.randn(N)) / np.sqrt(2)
+ print(np.var(n))  # ~ 1
+
+The division by :math:`\sqrt{2}` ensures that the total power (sum of I and Q variances) equals 1.
+
+.. code-block:: python
+
+ # Without normalization:
+ n_raw = np.random.randn(N) + 1j*np.random.randn(N)
+ print(np.var(np.real(n_raw)))  # ~ 1
+ print(np.var(np.imag(n_raw)))  # ~ 1
+ print(np.var(n_raw))            # ~ 2 (total power)
+
+ # With normalization:
+ n_norm = n_raw / np.sqrt(2)
+ print(np.var(n_norm))           # ~ 1 (unit power)
+
+***********************************
+Random Processes
+***********************************
+
+So far we've discussed random variables—random values at a single point. A **random process** (also called a **stochastic process**) is a collection of random variables indexed by time:
+
+.. math::
+   X(t) \quad \text{or} \quad X[n] \text{ for discrete time}
+
+At each time :math:`t`, :math:`X(t)` is a random variable. Think of a random process as a signal that evolves randomly over time.
+
+Examples in wireless communications:
+
+* Noise at the receiver: :math:`N(t)` or :math:`N[n]`
+* A signal experiencing time-varying fading: :math:`H(t)S(t)`
+* Samples from an SDR: each batch is a realization of a random process
+
+**Stationary Processes**
+
+A random process is **stationary** if its statistical properties don't change over time. In particular, a **wide-sense stationary (WSS)** process has:
+
+* Constant mean: :math:`E[X(t)] = \mu` for all :math:`t`
+* Autocorrelation that depends only on time difference: :math:`E[X(t)X(t+\tau)]` depends only on :math:`\tau`, not :math:`t`
+
+Many noise sources in wireless systems are approximately stationary, which simplifies analysis significantly.
+
+**White Noise**
+
+**White noise** is a random process where samples at different times are uncorrelated, and the power spectral density is constant across all frequencies. Additive White Gaussian Noise (AWGN) is both:
+
+* **White**: uncorrelated in time, flat power spectrum
+* **Gaussian**: each sample is Gaussian distributed
+
+When we generate noise in Python using ``np.random.randn(N)``, each of the :math:`N` samples is an independent Gaussian random variable, creating a white noise process.
+
+
+Independence and Correlation
+#############################
+
+Two random variables :math:`X` and :math:`Y` are **independent** if knowing the value of one tells you nothing about the other. Mathematically, their joint PDF factors:
+
+.. math::
+   f_{X,Y}(x,y) = f_X(x) \cdot f_Y(y)
+
+Independence is a strong condition. A weaker condition is **uncorrelated**, which means:
+
+.. math::
+   E[XY] = E[X]E[Y]
+
+For Gaussian random variables, uncorrelated implies independent (this is a special property of Gaussians).
+
+In complex Gaussian noise, the I and Q components are independent:
+
+.. code-block:: python
+
+ N = 10000
+ I = np.random.randn(N)
+ Q = np.random.randn(N)
+
+ # Check independence via correlation
+ correlation = np.corrcoef(I, Q)[0, 1]
+ print(f"Correlation between I and Q: {correlation:.4f}")  # ~ 0
+
+
+***************************
+Further Reading
+***************************
+
+1. Papoulis, A., & Pillai, S. U. (2002). *Probability, Random Variables, and Stochastic Processes*. McGraw-Hill.
+2. Kay, S. M. (2006). *Intuitive Probability and Random Processes using MATLAB®*. Springer.
+3. https://en.wikipedia.org/wiki/Random_variable
+4. https://en.wikipedia.org/wiki/Normal_distribution
+5. https://en.wikipedia.org/wiki/Stochastic_process
diff --git a/content/sync.rst b/content/sync.rst
index b0ed913e..723e9083 100644
--- a/content/sync.rst
+++ b/content/sync.rst
@@ -483,3 +483,272 @@ You can think of it as 11 BPSK symbols.  We can look at the autocorrelation of t
 You can see it's 11 (length of the sequence) in the center, and -1 or 0 for all other delays.  It works well for finding the start of a frame because it essentially integrates 11 symbols worth of energy in an attempt to create a 1 bit spike in the output of the cross-correlation.  In fact, the hardest part of performing start-of-frame detection is figuring out a good threshold.  You don't want frames that aren't actually part of your protocol to trigger it.  That means in addition to cross-correlation you also have to do some sort of power normalizing, which we won't consider here.  In deciding a threshold, you have to make a trade-off between probability of detection and probability of false alarms.  Remember that the frame header itself will have information, so some false alarms are OK; you will quickly find it is not actually a frame when you go to decode the header and the CRC inevitably fails (because it wasn't actually a frame).  Yet while some false alarms are OK, missing a frame detection altogether is bad.
 
 Another sequence with great autocorrelation properties is Zadoff-Chu sequences, which are used in LTE.  They have the benefit of being in sets; you can have multiple different sequences that all have good autocorrelation properties, but they won't trigger each other (i.e., also good cross-correlation properties, when you cross-correlate different sequences in the set).  Thanks to that feature, different cell towers will be assigned different sequences so that a phone can not only find the start of the frame but also know which tower it is receiving from.
+
+***********************************************
+Full End-to-End QPSK Communication Example
+***********************************************
+
+Now that we've learned about time and frequency synchronization, let's put it all together in a complete end-to-end simulation. This example demonstrates a full QPSK communication system that:
+
+1. Takes an ASCII text message as input
+2. Modulates it using QPSK
+3. Applies pulse shaping with a raised-cosine filter
+4. Simulates a realistic wireless channel (frequency offset, timing delay, and noise)
+5. Performs time synchronization using the Mueller & Muller algorithm
+6. Performs frequency synchronization using a Costas Loop
+7. Demodulates the QPSK symbols
+8. Resolves phase ambiguity
+9. Decodes the message and calculates Bit Error Rate (BER)
+
+.. code-block:: python
+    
+   import numpy as np
+   import matplotlib.pyplot as plt
+   from scipy import signal
+
+   # Step 1: Generate Input Data (ASCII Message)
+   message = "Hello SDR!"
+   print(f"Original Message: {message}")
+
+   # Convert ASCII to bits
+   bits = []
+   for char in message:
+      byte = format(ord(char), '08b')  # Convert to 8-bit binary
+      bits.extend([int(b) for b in byte])
+   bits = np.array(bits)
+   print(f"Total bits: {len(bits)}")
+
+   # Step 2: QPSK Modulation
+   # Map bits to QPSK symbols: 00->-1-1j, 01->-1+1j, 10->1-1j, 11->1+1j
+   num_symbols = len(bits) // 2
+   symbols = np.zeros(num_symbols, dtype=complex)
+
+   for i in range(num_symbols):
+      bit_pair = (bits[2*i], bits[2*i+1])
+      if bit_pair == (0, 0):
+         symbols[i] = -1-1j
+      elif bit_pair == (0, 1):
+         symbols[i] = -1+1j
+      elif bit_pair == (1, 0):
+         symbols[i] = 1-1j
+      else:  # (1, 1)
+         symbols[i] = 1+1j
+
+   # Normalize to unit power
+   symbols = symbols / np.sqrt(2)
+
+   # Step 3: Pulse Shaping (Raised Cosine Filter)
+   sps = 8  # samples per symbol
+   num_taps = 101
+   beta = 0.35
+   Ts = sps
+   t = np.arange(-51, 52)
+   h = np.sinc(t/Ts) * np.cos(np.pi*beta*t/Ts) / (1 - (2*beta*t/Ts)**2)
+
+   # Upsample symbols
+   upsampled = np.zeros(len(symbols) * sps, dtype=complex)
+   upsampled[::sps] = symbols
+
+   # Apply pulse shaping
+   tx_samples = np.convolve(upsampled, h, mode='same')
+
+   # Step 4: Channel Simulation
+   # Add frequency offset
+   fs = 1e6
+   fo = 800  # 800 Hz frequency offset
+   Ts_sample = 1/fs
+   t_vec = np.arange(len(tx_samples)) * Ts_sample
+   channel_samples = tx_samples * np.exp(1j*2*np.pi*fo*t_vec)
+
+   # Add fractional delay
+   delay = 0.4
+   N_delay = 21
+   n = np.arange(-N_delay//2, N_delay//2)
+   h_delay = np.sinc(n - delay)
+   h_delay *= np.hamming(N_delay)
+   h_delay /= np.sum(h_delay)
+   channel_samples = np.convolve(channel_samples, h_delay, mode='same')
+
+   # Add AWGN noise
+   SNR_dB = 15
+   signal_power = np.mean(np.abs(channel_samples)**2)
+   noise_power = signal_power / (10**(SNR_dB/10))
+   noise = np.sqrt(noise_power/2) * (np.random.randn(len(channel_samples)) + 
+                                    1j*np.random.randn(len(channel_samples)))
+   rx_samples = channel_samples + noise
+
+   # Step 5: Time Synchronization (Mueller & Muller)
+   # Interpolate for fractional timing recovery
+   samples_interpolated = signal.resample_poly(rx_samples, 16, 1)
+
+   mu = 0
+   out = np.zeros(len(rx_samples) + 10, dtype=complex)
+   out_rail = np.zeros(len(rx_samples) + 10, dtype=complex)
+   i_in = 0
+   i_out = 2
+
+   while i_out < len(rx_samples) and i_in*16+16 < len(samples_interpolated):
+      out[i_out] = samples_interpolated[i_in*16 + int(mu*16)]
+      out_rail[i_out] = (np.real(out[i_out])>0)*2-1 + 1j*((np.imag(out[i_out])>0)*2-1)
+      x = (out_rail[i_out] - out_rail[i_out-2]) * np.conj(out[i_out-1])
+      y = (out[i_out] - out[i_out-2]) * np.conj(out_rail[i_out-1])
+      mm_val = np.real(y - x)
+      mu += sps + 0.01*mm_val  # Reduced gain for stability
+      i_in += int(np.floor(mu))
+      mu = mu - np.floor(mu)
+      i_out += 1
+
+   out = out[2:i_out]
+   time_synced = out
+
+   # Step 6: Frequency Synchronization (Costas Loop)
+   N = len(time_synced)
+   phase = 0
+   freq = 0
+   alpha = 0.01   # Reduced proportional gain to prevent oscillations
+   beta = 0.001   # Reduced integral gain
+   freq_synced = np.zeros(N, dtype=complex)
+
+   for i in range(N):
+      freq_synced[i] = time_synced[i] * np.exp(-1j*phase)
+      error = np.real(freq_synced[i]) * np.imag(freq_synced[i])
+      
+      freq += beta * error
+      phase += freq + alpha * error
+      
+      while phase >= 2*np.pi:
+         phase -= 2*np.pi
+      while phase < 0:
+         phase += 2*np.pi
+
+   # Step 7: QPSK Demodulation
+   # Skip first 35 symbols (allow sync algorithms to fully converge)
+   skip = min(35, len(freq_synced) // 2)  # At least 35 symbols or half the data
+   demod_symbols = freq_synced[skip:]
+
+   # Try all 4 phase rotations and pick the one with lowest error
+   best_ber = 1.0
+   best_rotation = 0
+   best_bits = None
+
+   for phase_rotation in range(4):
+      # Rotate by 0, π/2, π, 3π/2
+      rotation = np.exp(1j * np.pi / 2 * phase_rotation)
+      rotated_symbols = demod_symbols * rotation
+      
+      # Decode QPSK symbols back to bits
+      test_bits = []
+      for sym in rotated_symbols:
+         I = np.real(sym)
+         Q = np.imag(sym)
+         
+         if I >= 0 and Q >= 0:
+               test_bits.extend([1, 1])
+         elif I < 0 and Q >= 0:
+               test_bits.extend([0, 1])
+         elif I >= 0 and Q < 0:
+               test_bits.extend([1, 0])
+         else:
+               test_bits.extend([0, 0])
+      
+      test_bits = np.array(test_bits)
+      if len(test_bits) > len(bits):
+         test_bits = test_bits[:len(bits)]
+      elif len(test_bits) < len(bits):
+         test_bits = np.pad(test_bits, (0, len(bits) - len(test_bits)))
+      
+      # Calculate BER for this rotation
+      ber_test = np.sum(bits != test_bits) / len(bits)
+      if ber_test < best_ber:
+         best_ber = ber_test
+         best_rotation = phase_rotation
+         best_bits = test_bits
+
+   received_bits = best_bits
+   print(f"Best phase rotation: {best_rotation * 90}°")
+
+   # Step 8: Decode ASCII and Calculate BER
+   received_bits = np.array(received_bits)
+
+   # Trim to match original length
+   if len(received_bits) > len(bits):
+      received_bits = received_bits[:len(bits)]
+   elif len(received_bits) < len(bits):
+      # Pad with zeros if we have fewer bits
+      received_bits = np.pad(received_bits, (0, len(bits) - len(received_bits)))
+
+   # Calculate BER
+   bit_errors = np.sum(bits != received_bits)
+   ber = bit_errors / len(bits)
+
+   # Decode received message
+   received_message = ""
+   for i in range(len(received_bits) // 8):
+      byte_bits = received_bits[i*8:(i+1)*8]
+      byte_str = ''.join([str(b) for b in byte_bits])
+      received_message += chr(int(byte_str, 2))
+
+   # Step 9: Display Results
+   print("\n" + "="*50)
+   print("E2E QPSK Communication System Results")
+   print("="*50)
+   print(f"Transmitted Message: {message}")
+   print(f"Received Message:    {received_message}")
+   print(f"SNR: {SNR_dB} dB")
+   print(f"Frequency Offset: {fo} Hz")
+   print(f"Bit Error Rate: {ber:.6f} ({bit_errors}/{len(bits)} bit errors)")
+   print("="*50)
+
+   # Plot constellation before and after sync
+   fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
+   ax1.plot(np.real(rx_samples[::sps]), np.imag(rx_samples[::sps]), '.')
+   ax1.set_title('Before Synchronization')
+   ax1.set_xlabel('I')
+   ax1.set_ylabel('Q')
+   ax1.grid(True)
+   ax1.axis('equal')
+
+   ax2.plot(np.real(freq_synced[skip:]), np.imag(freq_synced[skip:]), '.')
+   ax2.set_title('After Synchronization')
+   ax2.set_xlabel('I')
+   ax2.set_ylabel('Q')
+   ax2.grid(True)
+   ax2.axis('equal')
+
+   plt.tight_layout()
+   plt.savefig('sync_e2e_qpsk_constellation.png', dpi=150, bbox_inches='tight')
+   plt.show()
+
+When you run this simulation, you should see:
+
+- The original transmitted message printed to the console
+- The received message (which should closely match the original, depending on SNR and channel conditions)
+- The Bit Error Rate (BER), showing the ratio of incorrectly received bits
+- Two constellation diagrams side-by-side:
+  
+  * **Left plot**: The received signal before synchronization, showing the effects of frequency offset, timing errors, and noise. The constellation points will appear rotated and spread out.
+  * **Right plot**: The signal after both time and frequency synchronization. The QPSK constellation should show four distinct clusters near the ideal symbol locations (±1±j)/√2.
+
+**Understanding the Results**
+
+This example demonstrates several important concepts:
+
+- **Phase Ambiguity**: QPSK (and most modulations) have inherent phase ambiguity. The Costas Loop can lock to any of four possible phase offsets (0°, 90°, 180°, 270°). The code resolves this by trying all four rotations and selecting the one with the lowest BER.
+
+- **Convergence Time**: The synchronization algorithms need time to converge. That's why we skip the first 35 symbols before demodulation. In a real system, you would use a preamble or training sequence for synchronization.
+
+- **SNR Impact**: Try adjusting the :code:`SNR_dB` parameter. Lower SNR values will increase the BER, while higher values should result in near-perfect reception.
+
+- **Frequency Offset**: The :code:`fo` parameter simulates oscillator drift or Doppler shift. Larger offsets make synchronization more challenging.
+
+**Exercises**
+
+Try modifying the code to explore different scenarios:
+
+1. Change the SNR to see how it affects BER (try values from 5 dB to 25 dB)
+2. Increase the frequency offset to 2000 Hz or 5000 Hz and observe the impact
+3. Modify the message to be longer and see if the BER changes
+4. Replace QPSK with BPSK by changing the symbol mapping (you'll need to adjust the demodulator accordingly)
+5. Add a plot showing BER vs SNR by running the simulation multiple times
+
+This simulation ties together all the concepts we've covered in this chapter: pulse shaping, channel effects, timing recovery, and frequency synchronization. It demonstrates that even with impairments like frequency offset, timing errors, and noise, we can successfully recover digital data using the synchronization techniques we've learned.
diff --git a/figure-generating-scripts/random_variables.py b/figure-generating-scripts/random_variables.py
new file mode 100644
index 00000000..7f458918
--- /dev/null
+++ b/figure-generating-scripts/random_variables.py
@@ -0,0 +1,69 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Generate 10,000 samples from standard Gaussian
+x = np.random.randn(10000)
+
+# Create histogram to visualize the distribution
+plt.hist(x, bins=50, density=True, alpha=0.7, edgecolor='black')
+plt.xlabel('Value')
+plt.ylabel('Probability Density')
+plt.title('Gaussian Distribution (μ=0, σ²=1)')
+plt.grid(True)
+plt.show()
+
+# Simulation parameters
+N = 10000
+
+# Generate standard Gaussian random variables (mean=0, var=1)
+x = np.random.randn(N)
+
+# Create different random variables by scaling and shifting
+y1 = x                        # mean=0, var=1
+y2 = 2 * x                    # mean=0, var=4
+y3 = x + 3                    # mean=3, var=1
+y4 = 0.5 * x - 1              # mean=-1, var=0.25
+
+# Verify properties
+signals = [y1, y2, y3, y4]
+labels = ['y1: x', 'y2: 2x', 'y3: x+3', 'y4: 0.5x-1']
+
+for i, (sig, label) in enumerate(zip(signals, labels)):
+    print(f"{label}")
+    print(f"  Sample mean: {np.mean(sig):.3f}")
+    print(f"  Sample variance: {np.var(sig):.3f}")
+    print()
+
+# Plot histograms
+fig, axes = plt.subplots(2, 2, figsize=(10, 8))
+axes = axes.flatten()
+
+for i, (sig, label, ax) in enumerate(zip(signals, labels, axes)):
+    ax.hist(sig, bins=50, density=True, alpha=0.7, edgecolor='black')
+    ax.set_title(label)
+    ax.set_xlabel('Value')
+    ax.set_ylabel('Density')
+    ax.grid(True)
+
+plt.tight_layout()
+plt.show()
+
+# Complex Gaussian noise demonstration
+n_complex = (np.random.randn(N) + 1j*np.random.randn(N)) / np.sqrt(2)
+
+print("Complex Gaussian Noise (unit power):")
+print(f"  Real part variance: {np.var(np.real(n_complex)):.3f}")
+print(f"  Imag part variance: {np.var(np.imag(n_complex)):.3f}")
+print(f"  Total variance: {np.var(n_complex):.3f}")
+
+# Plot on IQ plane
+plt.figure(figsize=(6, 6))
+plt.plot(np.real(n_complex[:1000]), np.imag(n_complex[:1000]), '.', alpha=0.3)
+plt.xlabel('In-phase (I)')
+plt.ylabel('Quadrature (Q)')
+plt.title('Complex Gaussian Noise on IQ Plane')
+plt.grid(True)
+plt.axis('equal')
+plt.xlim([-3, 3])
+plt.ylim([-3, 3])
+plt.show()
\ No newline at end of file