<?xml version="1.0" encoding="utf-8"?><!DOCTYPE article  PUBLIC '-//OASIS//DTD DocBook XML V4.4//EN'  'http://www.docbook.org/xml/4.4/docbookx.dtd'><article><articleinfo><title>NKJPNGrams</title><revhistory><revision><revnumber>11</revnumber><date>2021-01-26 11:10:30</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>10</revnumber><date>2013-05-20 16:10:44</date><authorinitials>AdamPrzepiorkowski</authorinitials></revision><revision><revnumber>9</revnumber><date>2013-01-25 15:20:35</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>8</revnumber><date>2012-09-10 16:58:21</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>7</revnumber><date>2012-08-01 11:13:07</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>6</revnumber><date>2012-08-01 11:12:58</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>5</revnumber><date>2012-08-01 11:12:46</date><authorinitials>MichalLenart</authorinitials></revision><revision><revnumber>4</revnumber><date>2012-08-01 10:29:43</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>3</revnumber><date>2012-08-01 10:29:27</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision><revision><revnumber>2</revnumber><date>2012-08-01 10:26:07</date><authorinitials>MaciejOgrodniczuk</authorinitials><revremark>Renamed from 'NGrams'.</revremark></revision><revision><revnumber>1</revnumber><date>2012-07-24 16:35:04</date><authorinitials>MaciejOgrodniczuk</authorinitials></revision></revhistory></articleinfo><section><title>N-grams from the balanced subcorpus of the National Corpus of Polish</title><para>The resource is a set of N-grams extracted from the balanced subcorpus of <ulink url="http://nkjp.pl">National Corpus of Polish</ulink> (300M tokens) for N from 1 to 5. Each unigram is maximum continuous chunk of non-whitespace lower-case characters. The resource contains all unique N-grams followed by number of occurrencies. </para><section><title>Downloads</title><itemizedlist><listitem><para><ulink url="http://zil.ipipan.waw.pl/NKJPNGrams/NKJPNGrams?action=AttachFile&amp;do=get&amp;target=1grams.gz">1grams.gz</ulink> </para></listitem><listitem><para><ulink url="http://zil.ipipan.waw.pl/NKJPNGrams/NKJPNGrams?action=AttachFile&amp;do=get&amp;target=2grams.gz">2grams.gz</ulink> </para></listitem><listitem><para><ulink url="http://zil.ipipan.waw.pl/NKJPNGrams/NKJPNGrams?action=AttachFile&amp;do=get&amp;target=3grams.gz">3grams.gz</ulink> </para></listitem><listitem><para><ulink url="http://zil.ipipan.waw.pl/NKJPNGrams/NKJPNGrams?action=AttachFile&amp;do=get&amp;target=4grams.gz">4grams.gz</ulink> </para></listitem><listitem><para><ulink url="http://zil.ipipan.waw.pl/NKJPNGrams/NKJPNGrams?action=AttachFile&amp;do=get&amp;target=5grams.gz">5grams.gz</ulink> </para></listitem></itemizedlist></section><section><title>Licence</title><para>NKJP ngrams are made available on CC-BY licence. </para></section></section></article>