import pandas as pd
import matplotlib.pyplot as plt
import os
import glob

# Configuration
RELEASES = ["f7","f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", "f16","f17","f18","f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", "f32", "f33", "f34", "f35", "f36", "f37", "f38", "f39", "f40", "f41", "f42", "f43"]
BASE_PATH = "../generated"

# Automatic language discovery from f43
# Scan stats/f43/ and treat each *.csv (except distribution.csv) as a language

all_languages = set()
discovery_release = "f43"  # Use latest release for discovery
discovery_path = f"{BASE_PATH}/{discovery_release}/languages/"
if os.path.exists(discovery_path):
    csv_files = glob.glob("*.csv", root_dir=f"{discovery_path}")
    for csv_file in csv_files:
        filename = os.path.basename(csv_file)
        # Extract language code from filename (e.g., "gu.csv" -> "gu")
        lang_code = filename.replace(".csv", "")
        all_languages.add(lang_code)

all_languages = sorted(all_languages)
print(f"Discovered {len(all_languages)} languages: {all_languages}")

Discovered 345 languages: ['aa', 'ab', 'ace', 'ach', 'ae', 'af', 'ain', 'ak', 'aln', 'am', 'an', 'ang', 'anp', 'ar', 'ar_DZ', 'ar_EG', 'ar_LY', 'ar_MA', 'ar_SA', 'as', 'ast', 'av', 'ay', 'ayc', 'az', 'azb', 'ba', 'bal', 'ban', 'bar', 'be', 'be_Latn', 'bem', 'ber', 'bg', 'bh', 'bi', 'bin', 'bm', 'bn', 'bn_BD', 'bn_IN', 'bo', 'bqi', 'br', 'brx', 'bs', 'byn', 'ca', 'ca@valencia', 'cak', 'ce', 'cgg', 'ch', 'chr', 'ckb', 'ckb_IR', 'cnr', 'co', 'cpf', 'cpp', 'cr', 'crh', 'cs', 'csb', 'cu', 'cv', 'cy', 'da', 'de', 'de_AT', 'de_CH', 'doi', 'dsb', 'dv', 'dz', 'ee', 'el', 'en', 'en@pirate', 'en_AU', 'en_CA', 'en_GB', 'en_IE', 'en_IN', 'en_NZ', 'en_Shaw', 'en_US', 'en_ZA', 'enm', 'eo', 'error', 'es', 'es_419', 'es_AR', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_MX', 'es_NI', 'es_PA', 'es_PE', 'es_PR', 'es_SV', 'es_US', 'es_UY', 'es_VE', 'et', 'eu', 'fa', 'ff', 'fi', 'fil', 'fj', 'fo', 'fr', 'fr_BE', 'fr_CA', 'fr_CH', 'frp', 'fur', 'fy', 'ga', 'gaa', 'gd', 'gez', 'gl', 'gn', 'grc', 'gu', 'guc', 'gug', 'gv', 'ha', 'haw', 'he', 'he_IL', 'hi', 'hne', 'ho', 'hr', 'hsb', 'ht', 'hu', 'hus', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'ilo', 'io', 'is', 'isv', 'it', 'iu', 'ja', 'jam', 'jbo', 'jv', 'ka', 'kab', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kmr', 'kmr_Latn', 'kn', 'ko', 'kok', 'kos', 'kr', 'krl', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lfn', 'lg', 'li', 'lmo', 'ln', 'lo', 'lt', 'ltg', 'lu', 'lv', 'lzh', 'mai', 'man', 'mg', 'mh', 'mhr', 'mi', 'mjw', 'mk', 'ml', 'mn', 'mni', 'mnk', 'mnw', 'mr', 'mrh', 'ms', 'ms_Arab', 'mt', 'mus', 'my', 'na', 'nah', 'nan_Hant', 'nap', 'nb_NO', 'nd', 'nds', 'ne', 'ng', 'nl', 'nl_BE', 'nn', 'nqo', 'nr', 'nso', 'nv', 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pa_PK', 'pam', 'pap', 'pi', 'pl', 'pms', 'prs', 'ps', 'pt', 'pt_BR', 'pt_PT', 'qu', 'rm', 'rn', 'ro', 'ro_MD', 'rom', 'ru', 'ru_UA', 'rue', 'rw', 'sa', 'sah', 'sat', 'sc', 'sco', 'sd', 'se', 'sg', 'sgs', 'shn', 'si', 'sid', 'sk', 'sl', 'sm', 'sma', 'smj', 'sn', 'so', 'son', 'sq', 'sr', 'sr@ijekavian', 'sr@ijekavian_Latn', 'sr_Cyrl', 'sr_Latn', 'ss', 'st', 'su', 'sv', 'sw', 'sw_TZ', 'szl', 'ta', 'te', 'tet', 'tg', 'th', 'ti', 'tig', 'tk', 'tl', 'tlh', 'tn', 'to', 'tok', 'tpi', 'tr', 'ts', 'tt', 'tt@iqtelif', 'tvl', 'tw', 'ty', 'tyv', 'tzm', 'udm', 'ug', 'uk', 'ur', 'ur_PK', 'uz', 'uz_Latn', 've', 'vec', 'vi', 'vo', 'wa', 'wae', 'wal', 'wo', 'xh', 'yi', 'yo', 'yue_Hant', 'za', 'zgh', 'zh_Hans', 'zh_Hans_SG', 'zh_Hant', 'zh_Hant_HK', 'zu']

# Load distribution data for all releases
# This provides totalsourcewords for computing distribution-wide progress
# Also includes release_date metadata for chronological ordering

distribution_data = {}
release_dates = {}

for release in RELEASES:
    dist_path = f"{BASE_PATH}/{release}/release.csv"
    if os.path.exists(dist_path):
        df = pd.read_csv(dist_path)
        distribution_data[release] = df
        
        # Extract release_date if available
        if "release_date" in df.columns:
            release_dates[release] = df["release_date"].iloc[0]
    else:
        print(f"Warning: File not found: {dist_path}")

print(f"Loaded distribution data for: {list(distribution_data.keys())}")
print(f"Release dates: {release_dates}")

Loaded distribution data for: ['f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43']
Release dates: {'f7': '2007-05-31 ', 'f8': '2007-11-08 ', 'f9': '2008-05-13 ', 'f10': '2008-11-25 ', 'f11': '2009-06-09 ', 'f12': '2009-11-17 ', 'f13': '2010-05-25 ', 'f14': '2010-11-02 ', 'f15': '2011-05-24 ', 'f16': '2011-11-08 ', 'f17': '2012-05-29 ', 'f18': '2013-01-15 ', 'f19': '2013-07-02 ', 'f20': '2013-12-17 ', 'f21': '2014-12-09 ', 'f22': '2015-05-26 ', 'f23': '2015-11-03 ', 'f24': '2016-06-21 ', 'f25': '2016-11-22 ', 'f26': '2017-07-11 ', 'f27': '2017-11-14 ', 'f28': '2018-05-01 ', 'f29': '2018-10-30 ', 'f30': '2019-04-30 ', 'f31': '2019-10-29 ', 'f32': '2020-04-28 ', 'f33': '2020-10-27 ', 'f34': '2021-04-27 ', 'f35': '2021-11-02 ', 'f36': '2022-05-10 ', 'f37': '2022-11-15 ', 'f38': '2023-04-18 ', 'f39': '2023-11-07', 'f40': '2024-04-23', 'f41': '2024-10-29', 'f42': '2025-04-15', 'f43': '2025-10-28'}

# Create release timeline DataFrame (for documentation and chronological sorting)
# Ensures releases are analyzed in chronological order

release_timeline = pd.DataFrame([
    {"release": rel, "release_date": release_dates.get(rel, "Unknown")}
    for rel in RELEASES
])

# Sort by date to confirm chronological order
if all(release_timeline["release_date"] != "Unknown"):
    release_timeline["date_parsed"] = pd.to_datetime(release_timeline["release_date"], format='mixed')
    release_timeline = release_timeline.sort_values("date_parsed")
    RELEASES_SORTED = release_timeline["release"].tolist()
    print(f"Chronologically sorted releases: {RELEASES_SORTED}")
else:
    RELEASES_SORTED = RELEASES
    print(f"Using default release order: {RELEASES_SORTED}")

release_timeline[["release", "release_date"]]

Chronologically sorted releases: ['f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43']

# Load language data for all discovered languages across all releases
# Structure: language_data[(release, lang)] = DataFrame

language_data = {}

for lang in all_languages:
    for release in RELEASES:
        csv_path = f"{BASE_PATH}/{release}/languages/{lang}.csv"
        if os.path.exists(csv_path):
            language_data[(release, lang)] = pd.read_csv(csv_path)

print(f"Loaded {len(language_data)} language-release combinations")

Loaded 10758 language-release combinations

# Compute metrics for all languages across all releases
# For each (language, release), compute:
# 1. translated_words: sum of translatedsourcewords column
# 2. language_total_words: sum of totalsourcewordssum column (for progress trend)
# 3. distribution_total_words: from distribution.csv (for distribution progress trend)

metrics = []

for (release, lang), df in language_data.items():
    # Metric 1: Translated words (absolute)
    translated_words = df["translatedsourcewords"].sum()
    
    # Metric 2: Language-level total words (for language progress)
    language_total_words = df["totalsourcewordssum"].sum()
    
    # Metric 3: Distribution-wide total words (from distribution.csv)
    if release in distribution_data:
        distribution_total_words = distribution_data[release]["totalsourcewords"].iloc[0]
    else:
        distribution_total_words = None
    
    # Compute progress ratios
    language_progress = (translated_words / language_total_words * 100) if language_total_words > 0 else 0
    distribution_progress = (translated_words / distribution_total_words * 100) if distribution_total_words else 0
    
    metrics.append({
        "language": lang,
        "release": release,
        "translated_words": translated_words,
        "language_total_words": language_total_words,
        "distribution_total_words": distribution_total_words,
        "language_progress": language_progress,
        "distribution_progress": distribution_progress
    })

metrics_df = pd.DataFrame(metrics)
print(f"Computed metrics for {len(metrics_df)} language-release combinations")
metrics_df.head(15)

Computed metrics for 10758 language-release combinations

# Pivot data to have languages as rows and releases as columns
# Create separate pivots for each metric

translated_words_pivot = metrics_df.pivot(
    index="language",
    columns="release",
    values="translated_words"
).reset_index()

language_progress_pivot = metrics_df.pivot(
    index="language",
    columns="release",
    values="language_progress"
).reset_index()

distribution_progress_pivot = metrics_df.pivot(
    index="language",
    columns="release",
    values="distribution_progress"
).reset_index()

print("Pivoted data for all three metrics")
translated_words_pivot.head()

Pivoted data for all three metrics

# Compute positive trends for each metric
# Positive trend defined as: value(f43) > value(f35)

# Ensure f35 and f43 columns exist
if "f35" in translated_words_pivot.columns and "f43" in translated_words_pivot.columns:
    translated_words_pivot["positive_trend"] = translated_words_pivot["f43"] > translated_words_pivot["f35"]
else:
    print("Warning: Cannot compute translated words trend - missing f35 or f43 data")
    translated_words_pivot["positive_trend"] = False

if "f35" in language_progress_pivot.columns and "f43" in language_progress_pivot.columns:
    language_progress_pivot["positive_trend"] = language_progress_pivot["f43"] > language_progress_pivot["f35"]
else:
    print("Warning: Cannot compute language progress trend - missing f35 or f43 data")
    language_progress_pivot["positive_trend"] = False

if "f35" in distribution_progress_pivot.columns and "f43" in distribution_progress_pivot.columns:
    distribution_progress_pivot["positive_trend"] = distribution_progress_pivot["f43"] > distribution_progress_pivot["f35"]
else:
    print("Warning: Cannot compute distribution progress trend - missing f35 or f43 data")
    distribution_progress_pivot["positive_trend"] = False

print("Computed positive trends for all three metrics")
translated_words_pivot.head()

Computed positive trends for all three metrics

# Compute summary statistics for the four bars
# Filter to only include languages with data for both f35 and f43

# Bar 1: Total number of languages in f43
total_languages = len(all_languages)

# Bar 2: Languages with positive translated words trend
translated_valid = translated_words_pivot.dropna(subset=["f35", "f43"])
positive_translated_words = translated_valid["positive_trend"].sum()

# Bar 3: Languages with positive language progress trend
progress_valid = language_progress_pivot.dropna(subset=["f35", "f43"])
positive_language_progress = progress_valid["positive_trend"].sum()

# Bar 4: Languages with positive distribution progress trend
dist_progress_valid = distribution_progress_pivot.dropna(subset=["f35", "f43"])
positive_distribution_progress = dist_progress_valid["positive_trend"].sum()

print(f"Total languages in f43: {total_languages}")
print(f"Languages with positive translated words trend: {positive_translated_words}")
print(f"Languages with positive language progress trend: {positive_language_progress}")
print(f"Languages with positive distribution progress trend: {positive_distribution_progress}")

Total languages in f43: 345
Languages with positive translated words trend: 174
Languages with positive language progress trend: 138
Languages with positive distribution progress trend: 90

# Create vertical bar chart with four bars
# Use hatch patterns for accessibility (no colors)

fig, ax = plt.subplots(figsize=(10, 6))

categories = [
    "Total Languages\n(f43)",
    "Positive\nTranslated Words\nTrend",
    "Positive\nLanguage Progress\nTrend",
    "Positive\nDistribution Progress\nTrend"
]

values = [
    total_languages,
    positive_translated_words,
    positive_language_progress,
    positive_distribution_progress
]

# Define hatch patterns for each bar
hatches = ['', '///', '\\\\\\', 'xxx']

bars = ax.bar(categories, values, color='white', edgecolor='black', linewidth=1.5)

# Apply different hatch patterns to each bar
for bar, hatch in zip(bars, hatches):
    bar.set_hatch(hatch)

# Add value labels on top of each bar
for i, (cat, val) in enumerate(zip(categories, values)):
    ax.text(i, val + 0.1, str(int(val)), ha='center', va='bottom', fontsize=11, fontweight='bold')

# Build title with release dates if available
if release_dates:
    date_range = f"{release_dates.get('f35', 'f35')} → {release_dates.get('f43', 'f43')}"
    title = f"Fedora Localization Trends Summary\n(f35 → f43: {date_range})"
else:
    title = "Fedora Localization Trends Summary (f35 → f43)"

ax.set_ylabel("Number of Languages", fontsize=12)
ax.set_title(title, fontsize=14, fontweight='bold')
ax.set_ylim(0, max(values) * 1.15)
ax.grid(axis='y', alpha=0.3, linestyle='--')

plt.tight_layout()
plt.show()

# View translated words evolution for all languages
translated_words_pivot.sort_values(by="f43", ascending=False)

# View language progress evolution (language-level completion %)
language_progress_pivot.sort_values(by="f43", ascending=False)

# View distribution progress evolution (Fedora-wide contribution %)
distribution_progress_pivot.sort_values(by="f43", ascending=False)

# Line plot: Translated words trend for top 5 languages (by f43 volume)
top_5_langs = translated_words_pivot.nlargest(5, "f43")

fig, ax = plt.subplots(figsize=(10, 6))

for idx, row in top_5_langs.iterrows():
    lang = row["language"]
    values = [row[rel] for rel in RELEASES if rel in row.index and pd.notna(row[rel])]
    releases = [rel for rel in RELEASES if rel in row.index and pd.notna(row[rel])]
    ax.plot(releases, values, marker='o', label=lang, linewidth=2)

ax.set_xlabel("Fedora Release", fontsize=12)
ax.set_ylabel("Translated Words", fontsize=12)
ax.set_title("Translated Words Trend - Top 5 Languages", fontsize=14, fontweight='bold')
ax.legend(title="Language", fontsize=10)
ax.grid(True, alpha=0.3, linestyle='--')

plt.tight_layout()
plt.show()

# Horizontal bar chart: Languages ranked by f43 translated words
# Show all languages with hatching for accessibility

fig, ax = plt.subplots(figsize=(10, 8))

sorted_data = translated_words_pivot.sort_values(by="f43", ascending=True)
languages = sorted_data["language"].tolist()
f43_values = sorted_data["f43"].tolist()

bars = ax.barh(languages, f43_values, color='white', edgecolor='black', linewidth=1.2)

# Apply hatch pattern
for bar in bars:
    bar.set_hatch('//')

ax.set_xlabel("Translated Words in f43", fontsize=12)
ax.set_ylabel("Language", fontsize=12)
ax.set_title("Languages Ranked by Translated Words (f43)", fontsize=14, fontweight='bold')
ax.grid(axis='x', alpha=0.3, linestyle='--')

plt.tight_layout()
plt.show()

# Print detailed summary statistics
print("=" * 60)
print("FEDORA LOCALIZATION TRENDS SUMMARY (f35 → f43)")
print("=" * 60)
print()
print(f"Total languages analyzed: {total_languages}")
print()
print("TREND ANALYSIS:")
print(f"  • Languages with positive translated words trend: {positive_translated_words}")
print(f"    (Growth in absolute translated words)")
print()
print(f"  • Languages with positive language progress trend: {positive_language_progress}")
print(f"    (Growth in language-level completion ratio)")
print()
print(f"  • Languages with positive distribution progress trend: {positive_distribution_progress}")
print(f"    (Growth in Fedora-wide contribution percentage)")
print()
print("=" * 60)
print()
print("Top 3 languages by translated words in f43:")
top_3 = translated_words_pivot.nlargest(3, "f43")[["language", "f43"]]
for idx, row in top_3.iterrows():
    print(f"  {row['language']}: {int(row['f43']):,} words")
print()
print("=" * 60)

============================================================
FEDORA LOCALIZATION TRENDS SUMMARY (f35 → f43)
============================================================

Total languages analyzed: 345

TREND ANALYSIS:
  • Languages with positive translated words trend: 174
    (Growth in absolute translated words)

  • Languages with positive language progress trend: 138
    (Growth in language-level completion ratio)

  • Languages with positive distribution progress trend: 90
    (Growth in Fedora-wide contribution percentage)

============================================================

Top 3 languages by translated words in f43:
  fr: 12,721,173 words
  de: 11,571,153 words
  es: 10,718,286 words

============================================================

	language	release	translated_words	language_total_words	distribution_total_words	language_progress	distribution_progress
0	aa	f7	4	12020	3457836	0.033278	0.000116
1	aa	f8	4	12174	3855649	0.032857	0.000104
2	aa	f16	0	7138	9698971	0.000000	0.000000
3	aa	f17	0	9286	10091855	0.000000	0.000000
4	aa	f18	0	11181	10770053	0.000000	0.000000
5	aa	f19	0	11181	11433072	0.000000	0.000000
6	aa	f20	0	11181	12425872	0.000000	0.000000
7	aa	f21	0	15825	13043139	0.000000	0.000000
8	aa	f22	0	15825	13454942	0.000000	0.000000
9	aa	f23	0	17168	13805956	0.000000	0.000000
10	aa	f24	0	23532	14211507	0.000000	0.000000
11	aa	f25	0	34546	14474492	0.000000	0.000000
12	aa	f26	0	37264	14366991	0.000000	0.000000
13	aa	f27	0	40994	14971951	0.000000	0.000000
14	aa	f28	0	41906	15305875	0.000000	0.000000

release	language	f10	f11	f12	f13	f14	f15	f16	f17	f18	...	f37	f38	f39	f40	f41	f42	f43	f7	f8	f9
0	aa	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	237.0	237.0	237.0	195.0	237.0	237.0	237.0	4.0	4.0	NaN
1	ab	NaN	NaN	0.0	0.0	0.0	0.0	3.0	3.0	3.0	...	60030.0	77127.0	102972.0	112846.0	120776.0	140583.0	143032.0	NaN	NaN	NaN
2	ace	412.0	443.0	405.0	405.0	405.0	NaN	NaN	NaN	NaN	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN
3	ach	NaN	NaN	2260.0	2424.0	2424.0	4042.0	4042.0	4042.0	4042.0	...	12334.0	10806.0	12970.0	8819.0	12757.0	17404.0	13257.0	NaN	NaN	NaN
4	ae	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN

release	language	f10	f11	f12	f13	f14	f15	f16	f17	f18	...	f38	f39	f40	f41	f42	f43	f7	f8	f9	positive_trend
0	aa	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	237.0	237.0	195.0	237.0	237.0	237.0	4.0	4.0	NaN	True
1	ab	NaN	NaN	0.0	0.0	0.0	0.0	3.0	3.0	3.0	...	77127.0	102972.0	112846.0	120776.0	140583.0	143032.0	NaN	NaN	NaN	True
2	ace	412.0	443.0	405.0	405.0	405.0	NaN	NaN	NaN	NaN	...	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
3	ach	NaN	NaN	2260.0	2424.0	2424.0	4042.0	4042.0	4042.0	4042.0	...	10806.0	12970.0	8819.0	12757.0	17404.0	13257.0	NaN	NaN	NaN	True
4	ae	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False

release	language	f10	f11	f12	f13	f14	f15	f16	f17	f18	...	f38	f39	f40	f41	f42	f43	f7	f8	f9	positive_trend
117	fr	3278761.0	3480201.0	3606670.0	3792313.0	3993891.0	4126640.0	4971948.0	5526147.0	5814230.0	...	11353172.0	11942937.0	12010992.0	12834621.0	13026996.0	12721173.0	2381499.0	2435615.0	2652440.0	True
69	de	3012530.0	3278903.0	3408173.0	3567481.0	3749889.0	3886767.0	4766742.0	5267544.0	5589272.0	...	10931303.0	11424786.0	11394370.0	11757061.0	11925119.0	11571153.0	2071483.0	2104876.0	2347071.0	True
92	es	3105932.0	3372876.0	3531746.0	3752970.0	3970812.0	4163082.0	5156558.0	5631163.0	6030101.0	...	10681656.0	10951224.0	10979677.0	11468045.0	11679167.0	10718286.0	2193043.0	2242024.0	2458943.0	True
259	ru	2137237.0	2271978.0	2395917.0	2585563.0	2771261.0	2987452.0	3721363.0	4125968.0	4378951.0	...	9284698.0	9758274.0	9696281.0	10992820.0	11052834.0	10378786.0	1525515.0	1491938.0	1699461.0	True
321	uk	1045239.0	1191644.0	1324943.0	1494693.0	1633798.0	1693106.0	2150781.0	2391505.0	2615061.0	...	7467192.0	8058446.0	8333624.0	9119700.0	9187197.0	8758076.0	731533.0	718277.0	783633.0	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
43	bqi	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
316	ty	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
35	bh	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
4	ae	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
2	ace	412.0	443.0	405.0	405.0	405.0	NaN	NaN	NaN	NaN	...	0.0	0.0	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False

release	language	f10	f11	f12	f13	f14	f15	f16	f17	f18	...	f38	f39	f40	f41	f42	f43	f7	f8	f9	positive_trend
271	sgs	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	100.000000	99.828375	100.0	100.0	100.0	100.0	NaN	NaN	NaN	False
57	cnr	NaN	NaN	NaN	NaN	NaN	NaN	100.0	100.0	100.0	...	0.360079	100.000000	100.0	100.0	100.0	100.0	NaN	NaN	NaN	True
132	guc	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	100.000000	100.000000	100.0	100.0	100.0	100.0	NaN	NaN	NaN	False
158	isv	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	100.000000	100.000000	100.0	100.0	100.0	100.0	NaN	NaN	NaN	False
84	en_IN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	100.000000	100.000000	100.0	100.0	100.0	100.0	NaN	NaN	NaN	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
65	cu	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.000000	0.000000	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
316	ty	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.000000	0.000000	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
35	bh	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	0.000000	0.000000	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
4	ae	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	...	0.000000	0.000000	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False
2	ace	19.153882	20.995261	19.669743	19.669743	19.669743	NaN	NaN	NaN	NaN	...	0.000000	0.000000	0.0	0.0	0.0	0.0	NaN	NaN	NaN	False

Fedora Localization Trends Analysis¶

Analysis Scope¶

Configuration and Imports¶

Additional Visualizations¶

Insights Summary¶

	release	release_date
0	f7	2007-05-31
1	f8	2007-11-08
2	f9	2008-05-13
3	f10	2008-11-25
4	f11	2009-06-09
5	f12	2009-11-17
6	f13	2010-05-25
7	f14	2010-11-02
8	f15	2011-05-24
9	f16	2011-11-08
10	f17	2012-05-29
11	f18	2013-01-15
12	f19	2013-07-02
13	f20	2013-12-17
14	f21	2014-12-09
15	f22	2015-05-26
16	f23	2015-11-03
17	f24	2016-06-21
18	f25	2016-11-22
19	f26	2017-07-11
20	f27	2017-11-14
21	f28	2018-05-01
22	f29	2018-10-30
23	f30	2019-04-30
24	f31	2019-10-29
25	f32	2020-04-28
26	f33	2020-10-27
27	f34	2021-04-27
28	f35	2021-11-02
29	f36	2022-05-10
30	f37	2022-11-15
31	f38	2023-04-18
32	f39	2023-11-07
33	f40	2024-04-23
34	f41	2024-10-29
35	f42	2025-04-15
36	f43	2025-10-28

release	language	f10	f11	f12	f13	f14	f15	f16	f17	f18	...	f38	f39	f40	f41	f42	f43	f7	f8	f9	positive_trend
117	fr	61.265292	60.504074	57.133991	37.539265	38.130470	37.554539	51.262634	54.758486	53.985157	...	55.962471	57.216273	57.669823	59.168176	59.602841	60.159342	68.872526	63.170040	64.647696	True
69	de	56.290633	57.004463	53.989559	35.313703	35.800934	35.371572	49.146884	52.195994	51.896421	...	53.882979	54.733913	54.709161	54.200576	54.561387	54.720815	59.906919	54.592003	57.204963	False
92	es	58.035896	58.638205	55.947104	37.149817	37.910130	37.886180	53.166032	55.799087	55.989520	...	52.652410	52.465171	52.718046	52.868199	53.436075	50.687546	63.422412	58.149069	59.931610	False
259	ru	39.935344	39.498847	37.954207	25.593914	26.457778	27.187345	38.368637	40.884139	40.658584	...	45.766473	46.749981	46.555922	50.677391	50.570393	49.082026	44.117622	38.694860	41.420819	True
321	uk	19.530815	20.716998	20.988691	14.795634	15.598194	15.408133	22.175352	23.697378	24.280855	...	36.807556	38.606437	40.013233	42.042224	42.034483	41.417571	21.155804	18.629211	19.099421	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
43	bqi	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	0.000000	0.000000	0.000000	0.000000	NaN	NaN	NaN	False
316	ty	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	NaN	NaN	NaN	False
35	bh	NaN	NaN	NaN	NaN	NaN	NaN	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	NaN	NaN	NaN	False
4	ae	NaN	NaN	NaN	NaN	NaN	NaN	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	NaN	NaN	NaN	False
2	ace	0.007698	0.007702	0.006416	0.004009	0.003867	NaN	NaN	NaN	NaN	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	NaN	NaN	NaN	False