[{"data":1,"prerenderedAt":751},["ShallowReactive",2],{"i-lucide:chevron-down":3,"i-lucide:graduation-cap":8,"i-lucide:list-ordered":10,"i-lucide:book-open":12,"i-lucide:type":14,"i-lucide:languages":16,"i-lucide:layers":18,"i-lucide:menu":20,"i-lucide:house":22,"i-lucide:chevron-right":24,"i-simple-icons:mastodon":26,"i-simple-icons:bluesky":28,"i-simple-icons:x":30,"i-simple-icons:linkedin":32,"article-\u002Fresources\u002Fwhy-the-first-1000-words-matter":35,"i-lucide:newspaper":741,"i-lucide:download":743,"i-lucide:pencil-line":745,"i-lucide:image":747,"i-lucide:file-text":749},{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":7},0,24,false,"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m6 9l6 6l6-6\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":9},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M21.42 10.922a1 1 0 0 0-.019-1.838L12.83 5.18a2 2 0 0 0-1.66 0L2.6 9.08a1 1 0 0 0 0 1.832l8.57 3.908a2 2 0 0 0 1.66 0zM22 10v6\"\u002F>\u003Cpath d=\"M6 12.5V16a6 3 0 0 0 12 0v-3.5\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":11},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M11 5h10m-10 7h10m-10 7h10M4 4h1v5M4 9h2m.5 11H3.4c0-1 2.6-1.925 2.6-3.5a1.5 1.5 0 0 0-2.6-1.02\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":13},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M12 7v14m-9-3a1 1 0 0 1-1-1V4a1 1 0 0 1 1-1h5a4 4 0 0 1 4 4a4 4 0 0 1 4-4h5a1 1 0 0 1 1 1v13a1 1 0 0 1-1 1h-6a3 3 0 0 0-3 3a3 3 0 0 0-3-3z\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":15},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M12 4v16M4 7V5a1 1 0 0 1 1-1h14a1 1 0 0 1 1 1v2M9 20h6\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":17},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m5 8l6 6m-7 0l6-6l2-3M2 5h12M7 2h1m14 20l-5-10l-5 10m2-4h6\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":19},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M12.83 2.18a2 2 0 0 0-1.66 0L2.6 6.08a1 1 0 0 0 0 1.83l8.58 3.91a2 2 0 0 0 1.66 0l8.58-3.9a1 1 0 0 0 0-1.83z\"\u002F>\u003Cpath d=\"M2 12a1 1 0 0 0 .58.91l8.6 3.91a2 2 0 0 0 1.65 0l8.58-3.9A1 1 0 0 0 22 12\"\u002F>\u003Cpath d=\"M2 17a1 1 0 0 0 .58.91l8.6 3.91a2 2 0 0 0 1.65 0l8.58-3.9A1 1 0 0 0 22 17\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":21},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M4 5h16M4 12h16M4 19h16\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":23},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M15 21v-8a1 1 0 0 0-1-1h-4a1 1 0 0 0-1 1v8\"\u002F>\u003Cpath d=\"M3 10a2 2 0 0 1 .709-1.528l7-6a2 2 0 0 1 2.582 0l7 6A2 2 0 0 1 21 10v9a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2z\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":25},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m9 18l6-6l-6-6\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":27},"\u003Cpath fill=\"currentColor\" d=\"M23.268 5.313c-.35-2.578-2.617-4.61-5.304-5.004C17.51.242 15.792 0 11.813 0h-.03c-3.98 0-4.835.242-5.288.309C3.882.692 1.496 2.518.917 5.127C.64 6.412.61 7.837.661 9.143c.074 1.874.088 3.745.26 5.611c.118 1.24.325 2.47.62 3.68c.55 2.237 2.777 4.098 4.96 4.857c2.336.792 4.849.923 7.256.38q.398-.092.786-.213c.585-.184 1.27-.39 1.774-.753a.06.06 0 0 0 .023-.043v-1.809a.05.05 0 0 0-.02-.041a.05.05 0 0 0-.046-.01a20.3 20.3 0 0 1-4.709.545c-2.73 0-3.463-1.284-3.674-1.818a5.6 5.6 0 0 1-.319-1.433a.053.053 0 0 1 .066-.054c1.517.363 3.072.546 4.632.546c.376 0 .75 0 1.125-.01c1.57-.044 3.224-.124 4.768-.422q.059-.011.11-.024c2.435-.464 4.753-1.92 4.989-5.604c.008-.145.03-1.52.03-1.67c.002-.512.167-3.63-.024-5.545m-3.748 9.195h-2.561V8.29c0-1.309-.55-1.976-1.67-1.976c-1.23 0-1.846.79-1.846 2.35v3.403h-2.546V8.663c0-1.56-.617-2.35-1.848-2.35c-1.112 0-1.668.668-1.67 1.977v6.218H4.822V8.102q0-1.965 1.011-3.12c.696-.77 1.608-1.164 2.74-1.164c1.311 0 2.302.5 2.962 1.498l.638 1.06l.638-1.06c.66-.999 1.65-1.498 2.96-1.498c1.13 0 2.043.395 2.74 1.164q1.012 1.155 1.012 3.12z\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":29},"\u003Cpath fill=\"currentColor\" d=\"M5.202 2.857C7.954 4.922 10.913 9.11 12 11.358c1.087-2.247 4.046-6.436 6.798-8.501C20.783 1.366 24 .213 24 3.883c0 .732-.42 6.156-.667 7.037c-.856 3.061-3.978 3.842-6.755 3.37c4.854.826 6.089 3.562 3.422 6.299c-5.065 5.196-7.28-1.304-7.847-2.97c-.104-.305-.152-.448-.153-.327c0-.121-.05.022-.153.327c-.568 1.666-2.782 8.166-7.847 2.97c-2.667-2.737-1.432-5.473 3.422-6.3c-2.777.473-5.899-.308-6.755-3.369C.42 10.04 0 4.615 0 3.883c0-3.67 3.217-2.517 5.202-1.026\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":31},"\u003Cpath fill=\"currentColor\" d=\"M14.234 10.162L22.977 0h-2.072l-7.591 8.824L7.251 0H.258l9.168 13.343L.258 24H2.33l8.016-9.318L16.749 24h6.993zm-2.837 3.299l-.929-1.329L3.076 1.56h3.182l5.965 8.532l.929 1.329l7.754 11.09h-3.182z\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":33,"hidden":34},"\u003Cpath fill=\"currentColor\" d=\"M20.447 20.452h-3.554v-5.569c0-1.328-.027-3.037-1.852-3.037c-1.853 0-2.136 1.445-2.136 2.939v5.667H9.351V9h3.414v1.561h.046c.477-.9 1.637-1.85 3.37-1.85c3.601 0 4.267 2.37 4.267 5.455v6.286zM5.337 7.433a2.06 2.06 0 0 1-2.063-2.065a2.064 2.064 0 1 1 2.063 2.065m1.782 13.019H3.555V9h3.564zM22.225 0H1.771C.792 0 0 .774 0 1.729v20.542C0 23.227.792 24 1.771 24h20.451C23.2 24 24 23.227 24 22.271V1.729C24 .774 23.2 0 22.222 0z\"\u002F>",true,{"id":36,"title":37,"author":38,"authorsTake":39,"body":40,"category":711,"cefrLevel":712,"date":713,"description":714,"extension":715,"faqs":716,"heroImage":712,"intro":712,"language":712,"lastUpdated":712,"meta":729,"navigation":34,"path":730,"seo":731,"socialDescription":712,"stem":732,"tags":733,"tldr":739,"verbSlugs":712,"__hash__":740},"resources\u002Fresources\u002Fwhy-the-first-1000-words-matter.md","Why the First 1,000 Words Matter More Than the Next 4,000","Michael McGettrick","My Spanish degree did not bend the curve. My pocket notebook in Madrid did. During my Erasmus year I carried a small notebook everywhere, wrote down every word I did not know, looked them up on the bus, revised them the next day. Pre-smartphone, pre-mobile data, no app, no streak. What I did not realise at the time is that the bus route was doing my frequency ranking for me. The words that kept reappearing were, by definition, the high-frequency core. The ones I wrote down once and never saw again quietly fell out of the notebook. The system selected for the cliff and ignored the plain, because that is what real language exposure does.\n\nThe honest reason I am so impatient with the gamified apps is that I have watched friends spend three years on them and still struggle to ask for the bill in a Madrid bar. It is not their fault. They were sold breadth dressed up as progress. If you front-load the rank-1 to rank-1,000 words and attach them to the present, past, and future of the dozen most common verbs, you will out-converse a three-year streak holder inside a season. That is not a hot take. It is arithmetic against the Zipf curve.\n\nThe hill I will die on is this. Frequency-first is not a clever optimisation, it is the default any sober adult learner should start from, and the only reason it sounds contrarian is that the consumer-app business model cannot ship it without losing daily active users. The methodology and the monetisation are in tension. The methodology is right.\n",{"type":41,"value":42,"toc":696},"minimark",[43,47,56,59,62,67,70,105,116,123,127,134,137,179,182,185,189,192,202,240,246,272,282,308,311,314,318,321,327,346,353,360,363,367,384,405,408,412,419,459,466,469,473,479,482,489,492,496,503,563,566,570,573,599,609,623,626,630,633,640,647,651],[44,45,37],"h1",{"id":46},"why-the-first-1000-words-matter-more-than-the-next-4000",[48,49,50,51,55],"p",{},"The single most useful number in adult language learning is this: the ",[52,53,54],"strong",{},"1,000 most frequent words of any natural language cover roughly 80% of everyday spoken conversation",". The next 4,000 words add about 10 percentage points. The remaining 100,000 words of the lexicon, the long tail every dictionary publisher has built a business on, account for the final 10%.",[48,57,58],{},"That ratio is the structural reason a thoughtful adult learner who studies the right 1,000 words for three months will out-converse a Duolingo streak-holder of three years. It is also the reason this site exists. Kilo Lingo's entire architecture, the 100-lesson sequence per language, the Core 1,000 word lists, the tier-graded short stories, sits on the back of that one statistic.",[48,60,61],{},"This article is the evidence behind it, the structural argument for why it is true, and the editorial case for why most of the popular learning apps either ignore the number or actively work against it.",[63,64,66],"h2",{"id":65},"the-lexical-coverage-research","The lexical-coverage research",[48,68,69],{},"The 80% figure is not folklore. It comes out of corpus linguistics, the field of counting how often each word appears in transcribed natural speech and computing the cumulative coverage.",[48,71,72,75,76,80,81,84,85,88,89,92,93,96,97,100,101,104],{},[52,73,74],{},"Paul Nation's"," 1990 book ",[77,78,79],"em",{},"Teaching and Learning Vocabulary"," set the modern baseline. His later 2006 paper ",[77,82,83],{},"How Large a Vocabulary is Needed for Reading and Listening?"," put the first 1,000 word families at around ",[52,86,87],{},"78% to 85% coverage"," of spoken discourse depending on the source corpus. ",[52,90,91],{},"Norbert Schmitt's"," 2010 monograph ",[77,94,95],{},"Researching Vocabulary"," converged on a similar figure. ",[52,98,99],{},"Adolphs and Schmitt's"," 2003 study ",[77,102,103],{},"Lexical Coverage of Spoken Discourse",", using the CANCODE corpus of British English conversation, found that the first 2,000 word families covered around 95% of casual conversation, with the first 1,000 doing most of that work.",[48,106,107,108,111,112,115],{},"On the American side, ",[52,109,110],{},"Mark Davies and Dee Gardner's"," 2010 ",[77,113,114],{},"Frequency Dictionary of American English",", built on the 425-million-word Corpus of Contemporary American English, gives the same shape: the first 1,000 lemmas cover roughly 75% to 80% of spoken English depending on register.",[48,117,118,119,122],{},"Different researchers cite the number anywhere from 70% to 85%. The honest middle is ",[52,120,121],{},"around 80%",". Anyone who tells you the number is exact is selling you something; anyone who tells you it does not exist has not read the literature.",[63,124,126],{"id":125},"zipfs-law-why-the-curve-is-so-brutal","Zipf's law: why the curve is so brutal",[48,128,129,130,133],{},"The reason the first 1,000 words punch so far above their weight is a deep statistical property of language called ",[52,131,132],{},"Zipf's law",", after the linguist George Kingsley Zipf, who described it in 1949. In any large corpus of natural language, the frequency of a word is roughly inversely proportional to its rank in the frequency list. The most common word appears about twice as often as the second most common, three times as often as the third, and so on.",[48,135,136],{},"The practical effect is a brutal head-and-tail distribution:",[138,139,140,152,162,172],"ul",{},[141,142,143,144,147,148,151],"li",{},"The ",[52,145,146],{},"100 most frequent words"," cover about ",[52,149,150],{},"50% of spoken text",".",[141,153,154,155,158,159,151],{},"The next ",[52,156,157],{},"900 words"," push cumulative coverage to about ",[52,160,161],{},"80%",[141,163,154,164,167,168,171],{},[52,165,166],{},"4,000 words"," add roughly ",[52,169,170],{},"10 percentage points",", taking you to 90%.",[141,173,174,175,178],{},"The remaining ",[52,176,177],{},"100,000+ words"," of the lexicon cover the final 10%.",[48,180,181],{},"Plot the curve and it looks like a cliff that levels off into a long flat plain. The first thousand words are the cliff. Everything after is the plain. This is not a quirk of English; the same shape holds for every natural language ever measured.",[48,183,184],{},"The consequence is unforgiving. Vocabulary is the single most non-linear input in language learning. The first 1,000 words are worth, in conversational coverage terms, roughly forty times what the next 1,000 words are worth per word learned. Treating them as equivalent units in a study plan is the central methodological error of most consumer language apps.",[63,186,188],{"id":187},"the-elephant-before-eat-problem","The \"elephant before eat\" problem",[48,190,191],{},"Open the most popular language-learning apps and check what they teach you in your first week. Compare it to the actual frequency rank of those words in their target language. The mismatch is the rhetorical engine of this entire piece, so it deserves the receipts.",[48,193,194,197,198,201],{},[52,195,196],{},"Spanish frequency ranks"," (from Davies's ",[77,199,200],{},"Frequency Dictionary of Spanish",", cross-checked against OpenSubtitles):",[138,203,204,210,216,222,228,234],{},[141,205,206,209],{},[52,207,208],{},"yo"," (I): rank 16",[141,211,212,215],{},[52,213,214],{},"tú"," (you, informal): rank 47",[141,217,218,221],{},[52,219,220],{},"es"," (is): rank 5",[141,223,224,227],{},[52,225,226],{},"tengo"," (I have): rank ~80",[141,229,230,233],{},[52,231,232],{},"quiero"," (I want): rank ~150",[141,235,236,239],{},[52,237,238],{},"elefante"," (elephant): rank ~4,500",[48,241,242,245],{},[52,243,244],{},"Mandarin frequency ranks"," (from the SUBTLEX-CH spoken corpus and the Lancaster Corpus of Mandarin Chinese):",[138,247,248,254,260,266],{},[141,249,250,253],{},[52,251,252],{},"我"," wǒ (I): rank 1",[141,255,256,259],{},[52,257,258],{},"是"," shì (am\u002Fis): rank 4",[141,261,262,265],{},[52,263,264],{},"你"," nǐ (you): rank 7",[141,267,268,271],{},[52,269,270],{},"大象"," dàxiàng (elephant): rank ~3,800",[48,273,274,277,278,281],{},[52,275,276],{},"French frequency ranks"," (from Lonsdale and Le Bras's ",[77,279,280],{},"Frequency Dictionary of French","):",[138,283,284,290,296,302],{},[141,285,286,289],{},[52,287,288],{},"je"," (I): rank 6",[141,291,292,295],{},[52,293,294],{},"tu"," (you, informal): rank 38",[141,297,298,301],{},[52,299,300],{},"c'est"," (it is): rank ~20",[141,303,304,307],{},[52,305,306],{},"écureuil"," (squirrel): rank ~4,200",[48,309,310],{},"An app that has you translating \"the elephant eats the apple\" in your first hour, while you still cannot say \"I want\", \"I have\", or \"where is\", has inverted the value curve. It is teaching you words that will appear in roughly 0.001% of the sentences you will ever need, while the rank-1 to rank-100 words that show up in literally every other utterance get drip-fed across weeks in the name of lesson variety.",[48,312,313],{},"This is not an accident. It is the natural consequence of optimising for daily-task engagement rather than acquired vocabulary.",[63,315,317],{"id":316},"what-80-actually-means","What 80% actually means",[48,319,320],{},"Here is where this article has to be honest with you, because there is a flattering version of the 80% number that I will not tell.",[48,322,323,326],{},[52,324,325],{},"80% known-vocabulary is not the same as 80% comprehension."," It is much worse than that, on first encounter. If one word in five is a black box, your working memory spends so much effort guessing the unknowns that the message often collapses.",[48,328,329,330,333,334,337,338,341,342,345],{},"The comprehensible-input research from ",[52,331,332],{},"Hsueh-Chao Hu and Paul Nation's"," 2000 study ",[77,335,336],{},"Unknown Vocabulary Density and Reading Comprehension"," put rough working numbers on this. For unaided reading comprehension of a text, you need roughly ",[52,339,340],{},"95% known-vocabulary coverage"," to follow the gist comfortably. For full comprehension you need closer to ",[52,343,344],{},"98%",". Below 90% known coverage, most adult readers report that the text feels impenetrable.",[48,347,348,349,352],{},"So the 1,000-word foundation is not a finish line. It will not, on its own, let you understand a Telemundo news bulletin or a Le Monde editorial. It is something more useful than that. It is the ",[52,350,351],{},"foundation that makes the next 4,000 words learnable from context",", because once 80% of the words on the page are familiar, the unknown 20% can mostly be inferred from surrounding context, from cognates, from grammatical function, and from repetition.",[48,354,355,356,359],{},"This is the actual mechanism by which adult learners go from A2 to B2 without explicitly drilling every word. They are not learning the next 4,000 words by flashcard. They are reading and listening to comprehensible input, the unknown words are showing up four or five times in plausible contexts, and the brain is doing what it evolved to do. The 1,000-word foundation is what makes that input ",[77,357,358],{},"comprehensible"," in the first place.",[48,361,362],{},"Sell yourself the 1,000 words as the on-ramp, not the destination. It is genuinely both, but the on-ramp framing leads to better study habits.",[63,364,366],{"id":365},"the-fsi-grounding","The FSI grounding",[48,368,369,370,373,374,377,378,383],{},"The most overlooked piece of evidence for the core-vocabulary-first approach is the curriculum design of the ",[52,371,372],{},"US Foreign Service Institute (FSI)",". The FSI teaches Spanish to Professional Working Proficiency (CEFR B2 to C1) in ",[52,375,376],{},"24 weeks"," of intensive classroom instruction, roughly 600 to 750 hours. It teaches Mandarin to the same level in 88 weeks, roughly 2,200 hours. The full FSI categorisation lives in the ",[379,380,382],"a",{"href":381},"\u002Ftools\u002Ffsi-time-to-fluency","FSI time-to-fluency calculator"," on this site.",[48,385,386,387,390,391,394,395,398,399,398,402,151],{},"What is buried in the published syllabi is the ordering. The ",[52,388,389],{},"first six weeks"," of FSI Spanish, before any extensive reading, before any free-form conversation, before any culture modules, are dominated by ",[52,392,393],{},"structural patterns and the core 500 to 1,000 vocabulary",". Pronouns, the present tense of the most common verbs, the canonical question words, the high-frequency function words. The FSI does not start with elephants and squirrels. It starts with ",[52,396,397],{},"ser, estar, tener, ir, querer, poder",", with ",[52,400,401],{},"qué, dónde, cuándo, cómo, por qué",[52,403,404],{},"yo, tú, él, nosotros",[48,406,407],{},"The reason the FSI can hit professional fluency in 600 hours and Duolingo cannot get the median user to A2 in 600 hours is not that FSI students are smarter or harder working. It is the curriculum design. The FSI front-loads the cliff. The apps spread it across a plain.",[63,409,411],{"id":410},"the-compounding-argument","The compounding argument",[48,413,414,415,418],{},"Once you have 1,000 words plus the structural backbone of the language (the present, past, and future of the dozen most common verbs, the noun-adjective agreement system, the basic question patterns), an enormous unlock happens. The activities that ",[52,416,417],{},"were not productive at A1 suddenly become productive at A2",":",[138,420,421,427,447,453],{},[141,422,423,426],{},[52,424,425],{},"Graded readers"," at 95% known-vocabulary coverage become readable for pleasure rather than as a chore.",[141,428,429,432,433,436,437,436,440,436,443,446],{},[52,430,431],{},"Comprehensible-input podcasts"," become trackable. ",[77,434,435],{},"Dreaming Spanish",", ",[77,438,439],{},"InnerFrench",[77,441,442],{},"Du Chinese",[77,444,445],{},"News in Slow Spanish"," are all built around the assumption that the listener has roughly the first 1,000 to 2,000 words. They are unusable below that threshold and high-leverage above it.",[141,448,449,452],{},[52,450,451],{},"Language exchange and tutor conversations"," stop being a humiliating ten minutes of \"lo siento, no entiendo\" and become an actual interaction, slow and clumsy but real.",[141,454,455,458],{},[52,456,457],{},"Subtitled native TV"," stops being incomprehensible audio with text that scrolls too fast and becomes a learning environment where the reading reinforces the listening.",[48,460,461,462,465],{},"This is where ",[52,463,464],{},"Stephen Krashen's"," input hypothesis earns its place in the methodology. The central claim, that language is acquired through comprehensible input slightly above the learner's current level, is the cleanest explanation for why the 1,000-word threshold matters. Below it, input is not comprehensible. Above it, almost any input you choose to consume contributes. The 1,000 words is what turns the rest of the language into a self-teaching environment.",[48,467,468],{},"Hours spent on the first 1,000 words are an investment in everything that comes after. Hours spent on the 4,000th word, before the 100th word is solid, are not.",[63,470,472],{"id":471},"why-most-apps-avoid-this-approach","Why most apps avoid this approach",[48,474,475,476,151],{},"The structural critique of the gamified-app model is not that the apps are stupid. It is that their ",[52,477,478],{},"incentives are misaligned with vocabulary acquisition",[48,480,481],{},"The strongest version of the apps' case, which I will state first because honest criticism requires it: the streak loop genuinely does increase study days. A Duolingo user with a 200-day streak has studied on 200 days. Without the streak mechanic, they would not have studied on most of them. Habit formation is real, and the apps are very good at it.",[48,483,484,485,488],{},"But study days do not equal acquired vocabulary. The streak loop optimises for daily-task completion, which means the lesson has to feel novel enough to be engaging today and easy enough to be completable in five minutes. Teaching the same 1,000 words slowly enough for an adult to actually retain them, with the repetition spacing that the spaced-repetition literature requires, takes weeks of feeling like you are not making \"progress\" by the app's metric. So the apps ",[52,486,487],{},"inflate breadth at the expense of depth",". New lesson, new theme, new fifteen words, including \"elephant\" and \"umbrella\" and \"guitar\", because new content generates a sense of forward motion.",[48,490,491],{},"The result is a lexicon that is wide and shallow. The user can recognise 1,500 words across forty thematic categories but cannot string together five fluent sentences, because the high-frequency function words and core verbs never got enough exposure to become automatic. The metrics the apps report (XP, streak days, league standing) look great; the transfer to actual conversation does not happen. This is what happens when the business model requires daily active users and the methodology requires patient depth. The two are in tension, and the business model wins.",[63,493,495],{"id":494},"what-kilo-lingo-does-instead","What Kilo Lingo does instead",[48,497,498,499,502],{},"Kilo Lingo is built top to bottom on the inverse of that tradeoff. The site sequences the ",[52,500,501],{},"first 1,000 words by frequency rank",", teaches them in deliberately repetitive lessons, and only widens out to thematic vocabulary once the core is solid. Concretely:",[138,504,505,524,534,552,555],{},[141,506,143,507,510,511,436,515,436,519,523],{},[52,508,509],{},"Core 1,000 word lists"," (",[379,512,514],{"href":513},"\u002Fspanish\u002Fwords\u002Fcore-1000","Spanish",[379,516,518],{"href":517},"\u002Ffrench\u002Fwords\u002Fcore-1000","French",[379,520,522],{"href":521},"\u002Fmandarin\u002Fwords\u002Fcore-1000","Mandarin",") are the spine. Every word has a frequency rank, a part-of-speech tag, and a worked example, derived from the OpenSubtitles spoken-language corpus.",[141,525,143,526,529,530,533],{},[52,527,528],{},"100-lesson sequence per language"," puts the Core 1,000 in ",[52,531,532],{},"lessons 1 to 20",", 50 words per lesson. Lessons 21 to 60 cover ranks 1,001 to 3,000; lessons 61 to 100 push into the 3,001 to 5,000 band.",[141,535,143,536,510,539,436,543,436,547,551],{},[52,537,538],{},"tier-checked short stories",[379,540,542],{"href":541},"\u002Fspanish\u002Fstories","Spanish stories",[379,544,546],{"href":545},"\u002Ffrench\u002Fstories","French stories",[379,548,550],{"href":549},"\u002Fmandarin\u002Fstories","Mandarin stories",") are written with strict vocabulary tier limits, so a Tier-1 story uses only Core 1,000 words. They are the comprehensible-input layer that compounds with the lesson layer.",[141,553,554],{},"Per-lesson quizzes test recall and recognition, not streak fillers.",[141,556,557,558,562],{},"The site-wide ",[379,559,561],{"href":560},"\u002Ftools\u002Fflashcards","spaced-repetition flashcard tool"," handles the long-term retention layer.",[48,564,565],{},"The cross-links are the point. This article funnels into the parts of the site that actually do the teaching.",[63,567,569],{"id":568},"honest-caveats","Honest caveats",[48,571,572],{},"The frequency-first approach has real limitations and pretending otherwise would undermine the rest of the argument.",[48,574,575,578,579,582,583,436,586,436,589,436,592,436,595,598],{},[52,576,577],{},"Corpora have biases."," The OpenSubtitles corpus that Kilo Lingo's frequency rankings derive from is conversational, which is the right register for spoken fluency, but it skews ",[52,580,581],{},"dramatic and film-dialogue",". Words like ",[77,584,585],{},"kill",[77,587,588],{},"love",[77,590,591],{},"fight",[77,593,594],{},"die",[77,596,597],{},"gun"," are over-represented relative to genuinely neutral everyday speech. A learner who studies the OpenSubtitles Top 1,000 will end up with a slight melodrama bias that they will need to correct through varied input later. Other corpora (Davies's COCA, the British National Corpus, the Lancaster Corpus of Mandarin Chinese) have their own biases, mostly toward written and news language.",[48,600,601,604,605,608],{},[52,602,603],{},"Native lexicons are domain-specific."," Business Mandarin, academic French, hospitality Spanish, and travel Spanish have different \"high-frequency tail\" words. A Core 1,000 list gets you the universal conversational layer, but the working vocabulary of a specific domain often sits in the 1,000-to-5,000 frequency band of the general corpus, and is much more common within the domain than its general rank suggests. Kilo Lingo's ",[52,606,607],{},"per-scenario phrase pages"," (restaurant Spanish, airport French, business Mandarin, and the rest of the scenario library) are the structural response to that, sitting alongside the frequency-first lessons rather than replacing them.",[48,610,611,614,615,618,619,622],{},[52,612,613],{},"Frequency does not equal usefulness for your specific life."," If you are learning Spanish for paediatric nursing, the names of common childhood illnesses are higher-leverage for you than their general-corpus rank suggests. Frequency-first is the best ",[52,616,617],{},"default"," when you do not have a specific domain, and the best ",[52,620,621],{},"foundation"," even when you do, because the function words and core verbs are universal across domains. But your top-up beyond the 1,000 should be tailored to your purpose.",[48,624,625],{},"The 80% number is a statistical regularity, not a guarantee about your particular conversation tomorrow.",[63,627,629],{"id":628},"the-thesis-restated","The thesis, restated",[48,631,632],{},"The first 1,000 words of any natural language plus the core grammar that lets you combine them is the only foundation that compounds. It is the difference between an adult learner who reaches functional conversational A2 to B1 in three to six months of focused study, and an app user who is still on the streak after three years and cannot order coffee without rehearsing.",[48,634,635,636,639],{},"Everything after the 1,000-word mark is a long compounding tail that the input itself will mostly teach you, provided you have the foundation that makes the input comprehensible. Everything ",[52,637,638],{},"before"," the 1,000-word mark is the only place a study plan should start.",[48,641,642,643,646],{},"That is the site's editorial position, the architectural rationale behind every page in the Core 1,000 sequence, and the answer to the question on the home page. The vocabulary that matters is not all vocabulary equally. It is the ",[52,644,645],{},"first thousand",", in frequency order, learned to automaticity, with grammar attached. The rest will come, in its own time, because you will have built the only structure on which it can land.",[63,648,650],{"id":649},"cross-references","Cross-references",[138,652,653,666,671,676,684],{},[141,654,143,655,436,658,661,662,665],{},[379,656,657],{"href":513},"Spanish Core 1,000 list",[379,659,660],{"href":517},"French Core 1,000 list",", and ",[379,663,664],{"href":521},"Mandarin Core 1,000 list"," are the working application of this article's argument.",[141,667,143,668,670],{},[379,669,382],{"href":381}," puts real hour targets on the methodology.",[141,672,143,673,675],{},[379,674,561],{"href":560}," is the retention layer for the Core 1,000.",[141,677,678,679,683],{},"The companion piece ",[379,680,682],{"href":681},"\u002Fresources\u002Fhow-polyglots-learn-languages","How Polyglots Actually Learn Languages"," covers the broader methodology toolkit this article sits inside.",[141,685,143,686,436,689,661,692,695],{},[379,687,514],{"href":688},"\u002Fspanish",[379,690,518],{"href":691},"\u002Ffrench",[379,693,522],{"href":694},"\u002Fmandarin"," pillar pages are the entry points into the per-language lesson sequences.",{"title":697,"searchDepth":698,"depth":698,"links":699},"",2,[700,701,702,703,704,705,706,707,708,709,710],{"id":65,"depth":698,"text":66},{"id":125,"depth":698,"text":126},{"id":187,"depth":698,"text":188},{"id":316,"depth":698,"text":317},{"id":365,"depth":698,"text":366},{"id":410,"depth":698,"text":411},{"id":471,"depth":698,"text":472},{"id":494,"depth":698,"text":495},{"id":568,"depth":698,"text":569},{"id":628,"depth":698,"text":629},{"id":649,"depth":698,"text":650},"Methodology",null,"2026-06-08T00:00:00+00:00","The first 1,000 most frequent words of a language cover roughly 80% of everyday speech. Here is the evidence, and why it should reorder your study plan.","md",[717,720,723,726],{"q":718,"a":719},"How many words do you need to have a basic conversation in Spanish, French, or Mandarin?","Roughly the first 1,000 most frequent words plus the present, past, and future of the dozen most common verbs. Corpus research from Paul Nation, Norbert Schmitt, and Mark Davies puts the first 1,000 word families at around 78% to 85% coverage of spoken discourse, which is enough for a slow but real conversation when paired with core grammar.",{"q":721,"a":722},"What is Zipf's law and why does it matter for language learning?","Zipf's law, described by George Kingsley Zipf in 1949, says that in any large corpus the frequency of a word is roughly inversely proportional to its rank. In practical terms the 100 most frequent words cover around 50% of spoken text, the next 900 push you to roughly 80%, and the next 4,000 only add about ten percentage points. The first thousand words are the cliff, everything after is the plain, and ignoring that shape is the central error of most consumer apps.",{"q":724,"a":725},"Is the 1,000-word threshold enough to understand Spanish TV or French news?","No, and anyone who tells you otherwise is selling something. Hsueh-Chao Hu and Paul Nation's 2000 study put unaided reading comprehension at around 95% known-vocabulary coverage and full comprehension closer to 98%. The 1,000-word foundation is not the finish line, it is the on-ramp that makes the next 4,000 words learnable from comprehensible input rather than from brute flashcards.",{"q":727,"a":728},"Why do apps like Duolingo teach words like elephant and umbrella so early?","Because their business model optimises for daily-task engagement rather than acquired vocabulary. Teaching the same 1,000 high-frequency words slowly enough for adults to retain them does not feel like daily progress, so the apps inflate breadth with thematic novelty. Elephant sits around rank 4,500 in Spanish, while tengo (I have) and quiero (I want) sit inside the first 150, yet the elephant gets the lesson and the core verbs get drip-fed across weeks.",{},"\u002Fresources\u002Fwhy-the-first-1000-words-matter",{"title":37,"description":714},"resources\u002Fwhy-the-first-1000-words-matter",[734,735,736,737,738,132],"vocabulary","core 1000","lexical coverage","language learning methodology","FSI","The first 1,000 most frequent words of any natural language cover roughly 80% of everyday speech, the next 4,000 add only ten more percentage points, and any study plan that does not front-load that cliff is quietly working against the learner. Frequency-first is the only foundation that compounds.","DtBKDXAg1vuXGk3QpLPCNuaJXjNJvVksWDxovqTEsvI",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":742},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M15 18h-5m8-4h-8m-6 8h16a2 2 0 0 0 2-2V4a2 2 0 0 0-2-2H8a2 2 0 0 0-2 2v16a2 2 0 0 1-4 0v-9a2 2 0 0 1 2-2h2\"\u002F>\u003Crect width=\"8\" height=\"4\" x=\"10\" y=\"6\" rx=\"1\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":744},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M12 15V3m9 12v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4\"\u002F>\u003Cpath d=\"m7 10l5 5l5-5\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":746},"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"M13 21h8M15 5l4 4m2.174-2.188a1 1 0 0 0-3.986-3.987L3.842 16.174a2 2 0 0 0-.5.83l-1.321 4.352a.5.5 0 0 0 .623.622l4.353-1.32a2 2 0 0 0 .83-.497z\"\u002F>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":748},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Crect width=\"18\" height=\"18\" x=\"3\" y=\"3\" rx=\"2\" ry=\"2\"\u002F>\u003Ccircle cx=\"9\" cy=\"9\" r=\"2\"\u002F>\u003Cpath d=\"m21 15l-3.086-3.086a2 2 0 0 0-2.828 0L6 21\"\u002F>\u003C\u002Fg>",{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":750},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M6 22a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h8a2.4 2.4 0 0 1 1.704.706l3.588 3.588A2.4 2.4 0 0 1 20 8v12a2 2 0 0 1-2 2z\"\u002F>\u003Cpath d=\"M14 2v5a1 1 0 0 0 1 1h5M10 9H8m8 4H8m8 4H8\"\u002F>\u003C\u002Fg>",1781519468036]