leanprover-community · vihdzp · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/Mathlib.lean b/Mathlib.lean
@@ -3444,6 +3444,7 @@ import Mathlib.Data.List.ProdSigma
 import Mathlib.Data.List.Range
 import Mathlib.Data.List.ReduceOption
 import Mathlib.Data.List.Rotate
+import Mathlib.Data.List.RunLength
 import Mathlib.Data.List.Scan
 import Mathlib.Data.List.Sections
 import Mathlib.Data.List.Shortlex

diff --git a/Mathlib/Data/List/Flatten.lean b/Mathlib/Data/List/Flatten.lean
@@ -3,8 +3,7 @@ Copyright (c) 2017 Mario Carneiro. All rights reserved.
 Released under Apache 2.0 license as described in the file LICENSE.
 Authors: Sébastien Gouëzel, Floris van Doorn, Mario Carneiro, Martin Dvorak
 -/
-import Mathlib.Tactic.GCongr.Core
-import Mathlib.Util.AssertExists
+import Mathlib.Data.List.Induction
 
 /-!
 # Join of a list of lists
@@ -42,4 +41,15 @@ theorem append_flatten_map_append (L : List (List α)) (x : List α) :
     x ++ (L.map (· ++ x)).flatten = (L.map (x ++ ·)).flatten ++ x := by
   induction L with grind
 
+theorem head_flatten_of_head_ne_nil {l : List (List α)} (hl : l ≠ []) (hl' : l.head hl ≠ []) :
+    l.flatten.head (flatten_ne_nil_iff.2 ⟨_, head_mem hl, hl'⟩) =
+      (l.head hl).head hl' := by
+  cases l with grind
+
+theorem getLast_flatten_of_getLast_ne_nil {l : List (List α)} (hl : l ≠ [])
+    (hl' : l.getLast hl ≠ []) :
+    l.flatten.getLast (flatten_ne_nil_iff.2 ⟨_, getLast_mem hl, hl'⟩) =
+      (l.getLast hl).getLast hl' := by
+  cases l using reverseRecOn with grind
+
 end List
diff --git a/Mathlib/Data/List/RunLength.lean b/Mathlib/Data/List/RunLength.lean
@@ -0,0 +1,151 @@
+/-
+Copyright (c) 2024 Violeta Hernández Palacios. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Violeta Hernández Palacios
+-/
+import Mathlib.Data.List.SplitBy
+import Mathlib.Data.PNat.Defs
+
+/-!
+# Run-length encoding
+-/
+
+variable {α : Type*}
+
+namespace List
+
+variable [DecidableEq α]
+
+/-- Run-length encoding of a list. Returns a list of pairs `(n, a)` representing consecutive groups
+of `a` of length `n`. -/
+def RunLength (l : List α) : List (ℕ+ × α) :=
+  (l.splitBy (· == ·)).pmap
+    (fun m hm ↦ (⟨m.length, length_pos_of_ne_nil hm⟩, m.head hm))
+    (fun _ ↦ ne_nil_of_mem_splitBy)
+
+@[simp]
+theorem runLength_nil : RunLength ([] : List α) = [] :=
+  rfl
+
+@[simp]
+theorem runLength_eq_nil {l : List α} : RunLength l = [] ↔ l = [] := by
+  rw [RunLength, pmap_eq_nil_iff, splitBy_eq_nil]
+
+theorem runLength_append {n : ℕ} (hn : 0 < n) {a : α} {l : List α} (ha : a ∉ l.head?) :
+    (replicate n a ++ l).RunLength = (⟨n, hn⟩, a) :: l.RunLength := by
+  suffices splitBy (· == ·) (replicate n a ++ l) = replicate n a :: l.splitBy (· == ·) by
+    simp [this, RunLength]
+  apply (splitBy_append ..).trans
+  · rw [splitBy_beq_replicate hn.ne', singleton_append]
+  · grind
+
+@[simp]
+theorem runLength_replicate {n : ℕ} (hn : 0 < n) (a : α) :
+    RunLength (replicate n a) = [(⟨n, hn⟩, a)] := by
+  convert runLength_append hn (a := a) (l := []) _ <;> simp
+
+theorem runLength_append_cons {n : ℕ} (hn : 0 < n) {a b : α} {l : List α} (h : a ≠ b) :
+    RunLength (replicate n a ++ b :: l) = (⟨n, hn⟩, a) :: (b :: l).RunLength := by
+  apply runLength_append hn
+  rwa [head?_cons, Option.mem_some_iff, eq_comm]
+
+@[simp]
+theorem flatten_map_runLength (l : List α) :
+    (l.RunLength.map fun x ↦ replicate x.1 x.2).flatten = l := by
+  rw [RunLength, map_pmap, pmap_eq_self.2, flatten_splitBy]
+  intro m hm
+  have := isChain_of_mem_splitBy hm
+  simp_rw [beq_iff_eq, isChain_eq_iff_eq_replicate] at this
+  exact (this _ (head_mem_head? _)).symm
+
+theorem runLength_injective : Function.Injective (List.RunLength (α := α)) := by
+  intro l m h
+  have := flatten_map_runLength m
+  rwa [← h, flatten_map_runLength] at this
+
+@[simp]
+theorem runLength_inj {l m : List α} : l.RunLength = m.RunLength ↔ l = m :=
+  runLength_injective.eq_iff
+
+theorem runLength_flatten_map {l : List (ℕ+ × α)} (hl : l.IsChain fun x y ↦ x.2 ≠ y.2) :
+    (l.map fun x ↦ replicate x.1 x.2).flatten.RunLength = l := by
+  induction l with
+  | nil => rfl
+  | cons x l IH =>
+    rw [isChain_cons] at hl
+    rw [map_cons, flatten_cons, runLength_append, IH hl.2]
+    · rfl
+    · cases l with
+      | nil => simp
+      | cons y l => simpa [head?_replicate] using (hl.1 y head?_cons).symm
+
+private theorem isChain_runLengthAux {α : Type*} {l : List (List α)} (hn : ∀ m ∈ l, m ≠ [])
+    (h : ∀ m (hm : m ∈ l), m.head (hn m hm) = m.getLast (hn m hm))
+    (hl : l.IsChain fun a b ↦ ∃ ha hb, a.getLast ha ≠ b.head hb) :
+    (l.pmap List.head hn).IsChain Ne := by
+  induction l with
+  | nil => exact isChain_nil
+  | cons a l IH => cases l with grind
+
+theorem isChain_runLength (l : List α) : l.RunLength.IsChain fun x y ↦ x.2 ≠ y.2 := by
+  rw [RunLength]
+  apply (List.isChain_map (β := ℕ+ × α) Prod.snd).1
+  rw [map_pmap]
+  apply isChain_runLengthAux
+  · intro m hm
+    have := isChain_of_mem_splitBy hm
+    simp_rw [beq_iff_eq, isChain_eq_iff_eq_replicate] at this
+    generalize_proofs hm
+    obtain ⟨n, a, hm'⟩ : ∃ n a, m = replicate n a := ⟨_, _, this _ (head_mem_head? hm)⟩
+    simp [hm']
+  · simpa using isChain_getLast_head_splitBy (· == ·) l
+
+private def runLengthRecOnAux (l : List (ℕ+ × α)) {p : List α → Sort*}
+    (hc : l.IsChain fun x y ↦ x.2 ≠ y.2) (hn : p [])
+    (hi : ∀ (n : ℕ+) {a l}, a ∉ l.head? → p l → p (replicate n a ++ l)) :
+    p (l.map fun x ↦ replicate x.1 x.2).flatten :=
+  match l with
+  | [] => hn
+  | (n, a) :: l => by
+    rw [isChain_cons] at hc
+    apply hi _ _ (runLengthRecOnAux l hc.2 hn hi)
+    cases l with
+    | nil => simp
+    | cons x l => simpa [head?_replicate] using (hc.1 x head?_cons).symm
+
+/-- Recursion on the run-length encoding of a list. -/
+@[elab_as_elim]
+def runLengthRecOn (l : List α) {p : List α → Sort*} (nil : p [])
+    (append : ∀ (n : ℕ+) (a l), a ∉ l.head? → p l → p (replicate n a ++ l)) : p l :=
+  cast (congr_arg p (flatten_map_runLength l))
+    (runLengthRecOnAux l.RunLength (isChain_runLength _) nil append)
+
+@[simp]
+theorem runLengthRecOn_nil {p : List α → Sort*} (nil : p [])
+    (append : ∀ (n : ℕ+) (a l), a ∉ l.head? → p l → p (replicate n a ++ l)) :
+    runLengthRecOn [] nil append = nil :=
+  rfl
+
+theorem runLengthRecOn_append {p : List α → Sort*} {n : ℕ} (h : 0 < n) {a : α} {l : List α}
+    (hl : a ∉ l.head?) (nil : p [])
+    (append : ∀ (n : ℕ+) (a l), a ∉ l.head? → p l → p (replicate n a ++ l)) :
+    runLengthRecOn (replicate n a ++ l) nil append =
+      append ⟨n, h⟩ _ _ hl (runLengthRecOn l nil append) := by
+  rw [runLengthRecOn, runLengthRecOn, cast_eq_iff_heq]
+  have H := runLength_append h hl
+  trans runLengthRecOnAux _ (H ▸ isChain_runLength _) nil append
+  · congr!
+  · rw [runLengthRecOnAux]
+    congr! <;> simp
+
+theorem splitBy_beq (l : List α) :
+    l.splitBy (· == ·) = l.RunLength.map fun x ↦ replicate x.1 x.2 := by
+  induction l using runLengthRecOn with
+  | nil => rfl
+  | append n a l ha IH =>
+    rw [splitBy_append, runLength_append _ ha, map_cons, IH]
+    · simp
+    · simp
+    · grind
+
+end List
diff --git a/Mathlib/Data/List/SplitBy.lean b/Mathlib/Data/List/SplitBy.lean
@@ -4,14 +4,15 @@ Released under Apache 2.0 license as described in the file LICENSE.
 Authors: Violeta Hernández Palacios
 -/
 import Mathlib.Data.List.Chain
+import Mathlib.Data.List.Flatten
 
 /-!
 # Split a list into contiguous runs of elements which pairwise satisfy a relation.
 
 This file provides the basic API for `List.splitBy` which is defined in Core.
 The main results are the following:
 
-- `List.join_splitBy`: the lists in `List.splitBy` join to the original list.
+- `List.flatten_splitBy`: the lists in `List.splitBy` join to the original list.
 - `List.nil_notMem_splitBy`: the empty list is not contained in `List.splitBy`.
 - `List.isChain_of_mem_splitBy`: any two adjacent elements in a list in
   `List.splitBy` are related by the specified relation.
@@ -51,6 +52,14 @@ theorem flatten_splitBy (r : α → α → Bool) (l : List α) : (l.splitBy r).f
   | nil => rfl
   | cons _ _ => flatten_splitByLoop
 
+@[simp]
+theorem splitBy_eq_nil {r : α → α → Bool} {l : List α} : l.splitBy r = [] ↔ l = [] := by
+  have := flatten_splitBy r l
+  refine ⟨fun _ ↦ ?_, ?_⟩ <;> simp_all
+
+theorem splitBy_ne_nil {r : α → α → Bool} {l : List α} : l.splitBy r ≠ [] ↔ l ≠ [] :=
+  splitBy_eq_nil.not
+
 private theorem nil_notMem_splitByLoop {r : α → α → Bool} {l : List α} {a : α} {g : List α} :
     [] ∉ splitBy.loop r l a g [] := by
   induction l generalizing a g with
@@ -65,17 +74,28 @@ private theorem nil_notMem_splitByLoop {r : α → α → Bool} {l : List α} {a
 
 @[deprecated (since := "2025-05-23")] alias nil_not_mem_splitByLoop := nil_notMem_splitByLoop
 
+@[simp]
 theorem nil_notMem_splitBy (r : α → α → Bool) (l : List α) : [] ∉ l.splitBy r :=
   match l with
   | nil => not_mem_nil
   | cons _ _ => nil_notMem_splitByLoop
 
 @[deprecated (since := "2025-05-23")] alias nil_not_mem_splitBy := nil_notMem_splitBy
 
-theorem ne_nil_of_mem_splitBy (r : α → α → Bool) {l : List α} (h : m ∈ l.splitBy r) : m ≠ [] := by
+theorem ne_nil_of_mem_splitBy {r : α → α → Bool} {l : List α} (h : m ∈ l.splitBy r) : m ≠ [] := by
   rintro rfl
   exact nil_notMem_splitBy r l h
 
+theorem head_head_splitBy (r : α → α → Bool) {l : List α} (hn : l ≠ []) :
+    ((l.splitBy r).head (splitBy_ne_nil.2 hn)).head
+      (ne_nil_of_mem_splitBy (head_mem _)) = l.head hn := by
+  simp_rw [← head_flatten_of_head_ne_nil, flatten_splitBy]
+
+theorem getLast_getLast_splitBy (r : α → α → Bool) {l : List α} (hn : l ≠ []) :
+    ((l.splitBy r).getLast (splitBy_ne_nil.2 hn)).getLast
+      (ne_nil_of_mem_splitBy (getLast_mem _)) = l.getLast hn := by
+  simp_rw [← getLast_flatten_of_getLast_ne_nil, flatten_splitBy]
+
 private theorem isChain_of_mem_splitByLoop {r : α → α → Bool} {l : List α} {a : α} {g : List α}
     (hga : ∀ b ∈ g.head?, r b a) (hg : g.IsChain fun y x ↦ r x y)
     (h : m ∈ splitBy.loop r l a g []) : m.IsChain fun x y ↦ r x y := by
@@ -145,6 +165,100 @@ theorem isChain_getLast_head_splitBy (r : α → α → Bool) (l : List α) :
     apply isChain_getLast_head_splitByLoop _ not_mem_nil isChain_nil
     rintro _ ⟨⟩
 
+private theorem splitByLoop_append {r : α → α → Bool} {l g : List α} {a : α}
+    (h : (g.reverse ++ a :: l).IsChain fun x y ↦ r x y)
+    (ha : ∀ x ∈ m.head?, r ((a :: l).getLast (cons_ne_nil a l)) x = false) :
+    splitBy.loop r (l ++ m) a g [] = (g.reverse ++ a :: l) :: m.splitBy r := by
+  induction l generalizing a g with
+  | nil =>
+    rw [nil_append]
+    cases m with
+    | nil => simp [splitBy.loop]
+    | cons c m => simp_all [splitBy.loop, splitByLoop_eq_append [_], splitBy]
+  | cons b l IH => simp_all [splitBy.loop]
+
+theorem splitBy_of_isChain {r : α → α → Bool} {l : List α} (hn : l ≠ [])
+    (h : l.IsChain fun x y ↦ r x y) : splitBy r l = [l] := by
+  cases l with
+  | nil => contradiction
+  | cons a l => rw [splitBy, ← append_nil l, splitByLoop_append] <;> simp [h]
+
+theorem splitBy_eq_singleton {r : α → α → Bool} {l : List α} :
+    splitBy r l = [l] ↔ l ≠ [] ∧ l.IsChain fun x y ↦ r x y := by
+  refine ⟨fun h ↦ ⟨fun _ ↦ ?_, ?_⟩, ?_⟩ <;>
+    simp_all [splitBy_of_isChain, @isChain_of_mem_splitBy _ l r l]
+
+private theorem splitBy_append_of_isChain {r : α → α → Bool} {l : List α} (hn : l ≠ [])
+    (h : l.IsChain fun x y ↦ r x y) (ha : ∀ x ∈ m.head?, r (l.getLast hn) x = false) :
+    (l ++ m).splitBy r = l :: m.splitBy r := by
+  cases l with
+  | nil => contradiction
+  | cons a l => rw [cons_append, splitBy, splitByLoop_append h ha]; simp
+
+theorem splitBy_flatten {r : α → α → Bool} {l : List (List α)} (hn : [] ∉ l)
+    (hc : ∀ m ∈ l, m.IsChain fun x y ↦ r x y)
+    (hc' : l.IsChain fun a b ↦ ∃ ha hb, r (a.getLast ha) (b.head hb) = false) :
+    l.flatten.splitBy r = l := by
+  induction l with
+  | nil => rfl
+  | cons a l IH =>
+    rw [mem_cons, not_or, eq_comm] at hn
+    rw [flatten_cons, splitBy_append_of_isChain hn.1 (hc _ mem_cons_self),
+      IH hn.2 (fun m hm ↦ hc _ (mem_cons_of_mem a hm)) hc'.tail]
+    intro y hy
+    rw [← head_of_mem_head? hy]
+    rw [isChain_cons] at hc'
+    obtain ⟨x, hx, _⟩ := flatten_ne_nil_iff.1 (ne_nil_of_mem (mem_of_mem_head? hy))
+    obtain ⟨_, _, H⟩ := hc'.1 (l.head (ne_nil_of_mem hx)) (head_mem_head? _)
+    rwa [head_flatten_of_head_ne_nil]
+
+/-- A characterization of `splitBy m r` as the unique list `l` such that:
+
+* The lists of `l` join to `m`.
+* It does not contain the empty list.
+* Every list in `l` is `IsChain` of `r`.
+* The last element of each list in `l` is not related by `r` to the head of the next.
+-/
+theorem splitBy_eq_iff {r : α → α → Bool} {l : List (List α)} :
+    m.splitBy r = l ↔ m = l.flatten ∧ [] ∉ l ∧ (∀ m ∈ l, m.IsChain fun x y ↦ r x y) ∧
+      l.IsChain fun a b ↦ ∃ ha hb, r (a.getLast ha) (b.head hb) = false := by
+  constructor
+  · rintro rfl
+    exact ⟨(flatten_splitBy r m).symm, nil_notMem_splitBy r m, fun _ ↦ isChain_of_mem_splitBy,
+      isChain_getLast_head_splitBy r m⟩
+  · rintro ⟨rfl, hn, hc, hc'⟩
+    exact splitBy_flatten hn hc hc'
+
+theorem splitBy_append {r : α → α → Bool} {l : List α}
+    (ha : ∀ x ∈ l.getLast?, ∀ y ∈ m.head?, r x y = false) :
+    (l ++ m).splitBy r = l.splitBy r ++ m.splitBy r := by
+  obtain rfl | hl := eq_or_ne l []
+  · simp
+  obtain rfl | hm := eq_or_ne m []
+  · simp
+  rw [splitBy_eq_iff]
+  refine ⟨by simp, by simp, ?_, ?_⟩
+  · aesop (add apply unsafe isChain_of_mem_splitBy)
+  rw [isChain_append]
+  refine ⟨isChain_getLast_head_splitBy _ _, isChain_getLast_head_splitBy _ _, fun x hx y hy ↦ ?_⟩
+  use ne_nil_of_mem_splitBy (mem_of_mem_getLast? hx), ne_nil_of_mem_splitBy (mem_of_mem_head? hy)
+  apply ha
+  · simp_rw [← getLast_of_mem_getLast? hx, getLast_getLast_splitBy _ hl]
+    exact getLast_mem_getLast? _
+  · simp_rw [← head_of_mem_head? hy, head_head_splitBy _ hm]
+    exact head_mem_head? _
+
+theorem splitBy_append_cons {r : α → α → Bool} {l : List α} {a : α} (m : List α)
+    (ha : ∀ x ∈ l.getLast?, r x a = false) :
+    (l ++ a :: m).splitBy r = l.splitBy r ++ (a :: m).splitBy r := by
+  apply splitBy_append
+  simpa
+
+@[simp]
+theorem splitBy_beq_replicate {n : ℕ} (hn : n ≠ 0) (a : α) [DecidableEq α] :
+    splitBy (· == ·) (replicate n a) = [replicate n a] := by
+  simp [hn, splitBy_eq_singleton, isChain_replicate_of_rel]
+
 @[deprecated (since := "2025-09-24")]
 alias chain'_getLast_head_splitBy := isChain_getLast_head_splitBy