<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE rules SYSTEM "lxtransduce.dtd">

<!-- Author: Adam Przepiórkowski, Łukasz Degórski -->
<!-- (c) 2006, 2007 -->

<rules type="xml" apply="main">
 

 <!-- 
 ***************************************************************************************
 Special token-level cases
 **************************************************************************************
 -->

 <rule name="SpecialNoun">
  <first>
   <seq>
    <query match="tok[. = 'e']"/>
    <query match="tok[. = '-learning']"/>
   </seq>
   <query match="tok[. = 'chat']"/>
  </first>
 </rule>

 <!-- AP: some non-sentence-ending abbreviations: -->
 <rule name="InternalAbbrev">
  <seq>
   <first>
    <query match="tok[. = 'tzw']"/>
    <query match="tok[. = '(tzw']"/>
    <query match="tok[. = 'np']"/>
    <query match="tok[. = '(np']"/>
   </first>
   <query match="tok[. = '.']"/>
  </seq>
 </rule>

 <!-- AP: catches "to jest", "tj." (Polish for "that is", also with a preceding comma): -->
 <rule name="Tj">
  <seq>
   <query match="tok[. = ',']" mult="?"/>
   <first>
    <seq>
     <query match="tok[. = 'to']"/>
     <query match="tok[. = 'jest']"/>
    </seq>
    <seq>
     <query match="tok[. ~ '^\(?tj$']"/>		    
     <query match="tok[. = '.']"/>
    </seq>
    <seq>
     <query match="tok[. = 't']"/>
     <query match="tok[. = '.']"/>
     <query match="tok[. = '.j']"/>
     <query match="tok[. = '.']"/>
    </seq>
   </first>
   <query match="tok[. = ',']" mult="?"/>
  </seq>
 </rule>

 <!--  AP: (impersonal form of) Polish for "be considered". -->
 <rule name="Uwazac_sie">
  <seq>
   <first>
    <query match="tok[. = 'uważa']"/>
    <query match="tok[. = 'przyjmuje']"/>
   </first>
   <query match="tok[. = 'się']"/>
  </seq>  
 </rule>

 <!--  AP: Verb which may be used for defining terms. -->
 <rule name="Verb">
  <first>
   <query match="tok[@base = 'zajmować']"/>
   <query match="tok[@base = 'oznaczać']"/>
   <query match="tok[@base = 'nazywać']"/>
   <query match="tok[@base = 'składać']"/>
   <query match="tok[@base = 'określać']"/>
   <query match="tok[@base = 'obejmować']"/>
   <query match="tok[@base = 'pozwalać']"/>
   <query match="tok[@base = 'umożliwiać']"/>
   <query match="tok[@base = 'polegać']"/>
   <!--    <query match="tok[@base = '']"/> -->
   <!--    <query match="tok[@base = 'łączyć']"/> -->
   <!--    <query match="tok[@base = '']"/> -->
   <!--    <query match="tok[@base = '']"/> -->
   <!--    <query match="tok[@base = '']"/> -->
   <!--    <query match="tok[@base = '']"/> -->
   <!--    <query match="tok[@base = '']"/> -->
   <!--    <query match="tok[@base = '']"/> -->
  </first>
 </rule>

 <!-- catches copulae -->
 <rule name="copula">
  <seq>
   <query match="tok[@ctag = 'interp']" mult="?"/>
   <first>
    <seq>
     <query match="tok[@base = 'być' and @ctag ~ '^(fin|praet)$']"/>
     <!--      <query match="tok[. ~ '^([Jj]est|[Ss]ą)$']"/> -->
     <query match="tok[. = 'to']"/>
    </seq>
    <query match="tok[. = 'to']"/>
   </first>
  </seq>
 </rule>
 
 <!-- catches only jest/są copulae -->
 <rule name="jest-sa">
  <query match="tok[@base = 'być' and @ctag ~ '^(fin|praet)$']"/>
<!--   <query match="tok[. ~ '^([Jj]est|[Ss]ą)$']"/> -->
 </rule>


 <!-- 
 ***************************************************************************************
 Basic and POS-based rules that reference only values and attributes(and other basic rules)
 **************************************************************************************
 -->

 <rule name="Unknown">
  <seq>
   <not>
    <query match="tok[. ~ '^.$']"/> <!-- Ó, &apos; -->
   </not>
   <first>
    <query match="tok[@ctag = 'ign']"/>
    <query match="tok[not(@ctag)]"/>
   </first>
  </seq>
 </rule>

 <rule name="Noun">
  <seq>
   <not>
    <query match="tok[. = 'to']"/>
   </not>
   <first>
    <query match="tok[@ctag = 'subst' and @msd[contains(.,$case)]]"/> 
    <query match="tok[@ctag = 'ger' and @msd[contains(.,$case)]]"/> 
    <ref name="SpecialNoun">
     <with-param name="case" value="$case"/>
    </ref>
    <ref name="Unknown"/>
   </first>
  </seq>
 </rule>
 
 <rule name="Noun_known">
  <seq>
   <not>
    <query match="tok[. = 'to']"/>
   </not>
   <first>
    <query match="tok[@ctag = 'subst' and @msd[contains(.,$case)]]"/> 
    <query match="tok[@ctag = 'ger' and @msd[contains(.,$case)]]"/> 
   </first>
  </seq>
 </rule>
 
 <rule name="Infinitive">
  <query match="tok[@ctag = 'inf']"/>
 </rule>
 
 <rule name="Adv">
  <first>
   <query match="tok[@ctag = 'adv']"/>
   <seq>
    <query match="tok[. = 'po']"/>
    <query match="tok[. = 'prostu']"/>
   </seq>
  </first>
 </rule>

 <rule name="Adj">
   <query match="tok[@ctag = 'adj' and @msd[contains(.,$case)]]"/> 
 </rule>

 <rule name="AdjParticiple">
  <first>
   <query match="tok[@ctag = 'ppas' and @msd[contains(.,$case)]]"/>
   <query match="tok[@ctag = 'pact' and @msd[contains(.,$case)]]"/>
  </first>
 </rule>

 <rule name="AdjP1">
  <seq>
   <ref name="Adv" mult="*"/>
   <not>
    <query match="tok[. = 'to']"/>
   </not>
   <first>
    <ref name="Adj">
     <with-param name="case" value="$case"/>
    </ref>
    <seq>
     <ref name="AdjParticiple">
      <with-param name="case" value="$case"/>
     </ref>
     <ref name="Noun">
      <with-param name="case" value="''"/>
     </ref>
    </seq>
   </first>
  </seq>
 </rule>

 <rule name="AdjP1Coord">
  <seq>
   <query match="tok[@ctag = 'conj']"/>
   <first>
    <ref name="AdjP1">
     <with-param name="case" value="$case"/>
    </ref>
    <ref name="Unknown"/>
   </first>
  </seq>
 </rule>

 <rule name="AdjP2">
  <seq>
   <ref name="AdjP1" mult="+">
    <with-param name="case" value="$case"/>
   </ref>
   <ref name="AdjP1Coord" mult="*">
    <with-param name="case" value="$case"/>
   </ref>
  </seq>
 </rule>


 
 <rule name="NP1_known">
  <seq>
   <query match="tok[. ~ '(„|“)']" mult="?"/>
   <ref name="AdjP2" mult="*">
    <with-param name="case" value="$case"/> 
   </ref>
   <ref name="Noun_known">
    <with-param name="case" value="$case"/>
   </ref>
   <ref name="AdjP2" mult="?">
    <with-param name="case" value="$case"/> 
   </ref>
   <query match="tok[. = '”']" mult="?"/>
  </seq>
 </rule>

 <rule name="NP1">
  <seq>
   <query match="tok[. ~ '(„|“)']" mult="?"/>
   <ref name="AdjP2" mult="*">
    <with-param name="case" value="$case"/> 
   </ref>
   <first>
    <seq>
     <ref name="Noun">
      <with-param name="case" value="$case"/>
     </ref>
     <ref name="AdjP2" mult="?">
      <with-param name="case" value="$case"/> 
     </ref>
    </seq>
    <!-- Dealing with tagger errors (e.g., sieć/acc semantyczna/nom): -->
    <seq>
     <ref name="Noun">
      <with-param name="case" value="''"/> <!-- any case here -->
     </ref>
     <ref name="AdjP2">
      <with-param name="case" value="$case"/> <!-- but non optional here -->
     </ref>
    </seq>
   </first>
   <query match="tok[. = '”']" mult="?"/>
  </seq>
 </rule>

 <rule name="NP1Coord">
  <seq>
   <query match="tok[@ctag = 'conj']"/>
   <ref name="NP1">
    <with-param name="case" value="$case"/>
   </ref>
  </seq>
 </rule>

 <rule name="NP2">
  <seq>
   <ref name="NP1" mult="+">
    <with-param name="case" value="$case"/>
   </ref>
   <ref name="NP1Coord" mult="*">
    <with-param name="case" value="$case"/>
   </ref>
  </seq>
 </rule>

 <rule name="NP2_known">
  <seq>
   <ref name="NP1_known" mult="+">
    <with-param name="case" value="$case"/>
   </ref>
   <ref name="NP1Coord" mult="*">
    <with-param name="case" value="$case"/>
   </ref>
  </seq>
 </rule>

 <!--  AP: dealing with tagger errors: these things might be nominative, --> 
 <!--  after all: -->
 <rule name="Noun_possibly_nom">
  <first>
   <query match="tok[@ctag = 'subst' and @msd[contains(.,'sg:gen:n')]]"/> <!-- słowa -->
   <query match="tok[@ctag = 'subst' and @msd[contains(.,'pl:acc:f')]]"/> <!-- frazy -->
   <query match="tok[@ctag = 'subst' and @msd[contains(.,'sg:acc:f')] and . ~ '[cćń]$']"/> <!-- zawartość -->
  </first>
 </rule>



 <rule name="PP">
  <seq>
   <query match="tok[@ctag = 'prep']"/>
   <ref name="NP1">
    <with-param name="case" value="''"/>
   </ref>
   <ref name="NP1" mult="?">
    <with-param name="case" value="':gen:'"/>
   </ref>
  </seq>
 </rule>
 

 <!--
 ************************
 Intermediate-level rules
 ************************    
 -->

 <!-- catches toks that are not !,? -->
 <rule name="tok_no_interp">
  <first>
   <ref name="InternalAbbrev"/>
   <query match="tok[. ~ '[^!\?]']"/>
  </first>
 </rule>
 
 <!-- catches a sequence non-interpunction elements,  until full stop -->
 <rule name="run_to_full_stop">
  <repeat-until name="tok_no_interp">
   <query match="tok[. = '.']"/>
  </repeat-until>
 </rule>    

 <!-- catches any sequence ending with ?  
 -->
 <rule name="anything-until-questionmark">
  <seq>
   <repeat-until name="tok_no_interp">
    <query match="tok[. = '?']"/>
   </repeat-until>      
  </seq>
 </rule>

 <!--  <rule name="anything-until-jest-sa"> -->
 <!--   <seq> -->
 <!--    <repeat-until name="tok_no_interp"> -->
 <!--     <ref name="jest-sa"/> -->
 <!--    </repeat-until>       -->
 <!--   </seq> -->
 <!--  </rule> -->

 <rule name="anything-until-rbracket">
  <seq>
   <repeat-until name="tok_no_interp">
    <query match="tok[. = ')']"/>
   </repeat-until>      
  </seq>
 </rule>

 <rule name="Verb-until-full-stop">
  <seq>
   <ref name="Verb"/>
   <ref name="run_to_full_stop"/>   
  </seq>
 </rule>

 <rule name="Parenthetical">
  <first>
   <seq>
    <query match="tok[. = '(']"/>
    <query match="tok[not(. = ')')]" mult="*"/>
    <query match="tok[. = ')']"/>
   </seq>
   <seq>
    <query match="tok[. = ',']"/>
    <first>
     <seq>
      <query match="tok[@base = 'taki']"/>
      <query match="tok[. = 'jak']"/>
     </seq>
     <query match="tok[. = 'inaczej']"/>
    </first>
    <query match="tok[not(. = ',')]" mult="*"/>
    <query match="tok[. = ',']"/>
   </seq>
  </first>
 </rule>
 
 <rule name="maybe_term">
  <seq>
   <first>
    <ref name="NP2">
     <with-param name="case" value="''"/>
    </ref>
    <ref name="Unknown" mult="+"/>
   </first>
   <first mult="*">
    <ref name="NP2">
     <with-param name="case" value="''"/>
    </ref>
    <ref name="PP"/>
    <ref name="Parenthetical"/>
    <ref name="Unknown"/>
   </first>
  </seq>
 </rule>

 <rule name="nominative_maybe_term">
  <seq>
   <first>
    <ref name="NP2">
     <with-param name="case" value="':nom:'"/>
    </ref>
    <ref name="Unknown" mult="+"/>
   </first>
   <first mult="*">
    <ref name="NP2">
     <with-param name="case" value="':gen:'"/>
    </ref>
    <ref name="PP"/>
    <ref name="Parenthetical"/>
    <ref name="Unknown"/>
   </first>
  </seq>
 </rule>

 <rule name="instrumental_known_pseudo_term">
  <seq>
   <first>
    <ref name="NP2_known">
     <with-param name="case" value="':inst:'"/>
    </ref>
    <ref name="Unknown" mult="+"/>
   </first>
   <first mult="*">
    <ref name="NP2">
     <with-param name="case" value="':gen:'"/>
    </ref>
    <ref name="PP"/>
    <ref name="Parenthetical"/>
    <ref name="Unknown"/>
   </first>
  </seq>
 </rule>

 <rule name="nominative_maybe_term_no_parenthetical">
  <seq>
   <first>
    <ref name="NP2">
     <with-param name="case" value="':nom:'"/>
    </ref>
    <ref name="Unknown"/>
   </first>
   <first mult="*">
    <ref name="NP2">
     <with-param name="case" value="':gen:'"/>
    </ref>
    <ref name="PP"/>
   </first>
  </seq>
 </rule>


 <!-- 
 **************************************************************
 Top-level rules that express patterns met in manual annotation
 **************************************************************
 -->
 
 <rule name="za_NP_uwaza_sie_definiens" wrap="definingText" attrs="def='$defid'">
  <seq>
   <query match="tok[. = 'za']"/>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <ref name="Uwazac_sie"/>
   <ref name="run_to_full_stop"/>
  </seq>
 </rule>
 
 <rule name="NP_to_jest_definiens" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <ref name="Tj"/>
   <ref name="run_to_full_stop"/>   
  </seq>
 </rule>
 
 <rule name="NP_jest_definiens-inst" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <ref name="jest-sa"/>
   <ref name="instrumental_known_pseudo_term"/>
   <!--    <ref name="NP1"> -->
   <!--     <with-param name="case" value="':inst:'"/> -->
   <!--    </ref> -->
   <ref name="run_to_full_stop"/>
  </seq>
 </rule>
 
 <!--  <rule name="Definiens-inst_jest_NP" wrap="definingText" attrs="def='$defid'"> -->
 <!--   <seq> -->
 <!--    <ref name="instrumental_known_pseudo_term"/> -->
 <!--    <ref name="jest-sa"/> -->
 <!--    <ref name="nominative_maybe_term" wrap="markedTerm" attrs="id='$defid'"/> -->
 <!--   </seq> -->
 <!--  </rule> -->

 <rule name="NP_colon_definiens-nom" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="nominative_maybe_term" wrap="markedTerm"  attrs="id='$defid'"/>
   <first>
    <query match="tok[. ~ '^(:|-|–)$']"/>
    <!-- special case for All_you_need_to_know_about_webbrowsers_kw.xml: -->
    <!-- (but it apparently has no effect at the moment) -->
    <seq> 
     <!-- should be ' below; &apos; --> 
     <query match="tok[. ~ '^.$' and @ctag = 'ign']"/> 
     <query match="tok[. = '€']"/>
     <query match="tok[. = '“']"/>
    </seq>
   </first>
   <ref name="copula" mult="?"/>
   <first>
    <ref name="NP1">
     <with-param name="case" value="':nom:'"/>
    </ref>
    <ref name="Noun_possibly_nom"/>
   </first>
   <ref name="run_to_full_stop"/> 
  </seq>
 </rule>

 <!-- Catches sequences like "Precyzja [jest] to stosunek liczby a do liczby b"
 -->
 <rule name="NP_copula_definiens-nom" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="nominative_maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <ref name="copula"/>
   <first mult="*">
    <ref name="Adv"/>
    <ref name="PP"/>
   </first>
   <first>
    <ref name="NP1">
     <with-param name="case" value="''"/>
    </ref>
    <query match="tok[@base = 'taki']"/>
   </first>
   <ref name="run_to_full_stop"/>
  </seq>
 </rule>
 
 <!-- Catches sequences like "Precyzja [jest] to stosunek liczby a do liczby b"
 -->
 <rule name="S_Beg_copula_definiens-nom" wrap="definingText" attrs="def='$defid'">
  <and>
   <query match="tok[1]"/>
   <seq>
    <ref name="copula"/>
    <first>
     <ref name="NP1">
    <with-param name="case" value="''"/>
   </ref>
    <query match="tok[@base = 'taki']"/>
   </first>
    <ref name="run_to_full_stop"/>
   </seq>
  </and>
 </rule>
 
 <!-- Catches sequences like "KlikoHIT określa wartość strony ...". 
 The definiendum must be at the beginning of the sentence to at least partially 
 avoid catching typical NP-VP sentences.
 This rule may cause more harm than good - by killing precision.  
 -->
 <rule name="NP_definiens-VP-3pers" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <ref name="Adv" mult="*"/>
   <ref name="Verb-until-full-stop"/>
  </seq>
 </rule>
 
 <rule name="NP_czyli_definiens-anyquestion" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <query match="tok[. ~ '^(-|–|Ó|,)$']"/>
   <query match="tok[. = 'czyli']"/>
   <ref name="anything-until-questionmark"/>
  </seq>	
 </rule>
 
 <rule name="NP_czyli_definiens-nom" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <query match="tok[. ~ '^(-|–|Ó|,)$']"/>
   <first>
    <query match="tok[. = 'czyli']"/>	
    <ref name="Tj"/>
   </first>
   <ref name="NP1">
    <with-param name="case" value="''"/>
   </ref>
   <ref name="run_to_full_stop"/>
  </seq>	
 </rule>  

 <rule name="NP_definiens-in-brackets" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="nominative_maybe_term_no_parenthetical" wrap="markedTerm" attrs="id='$defid'"/>    
   <query match="tok[. = '(']"/>
   <first>
    <ref name="copula"/>
    <ref name="Tj"/>
    <query match="tok[. = 'czyli']"/>
   </first>
   <ref name="anything-until-rbracket"/>
   <query match="tok[. = ')']"/>
  </seq>
 </rule>
 
 <!--  <rule name="acronym" wrap="definingText" attrs="def='$defid'"> -->
 <!--   <seq> -->
 <!--    <query match="tok[. ~ '^[A-Z]+$']" wrap="markedTerm" attrs="id='$defid'"/> -->
 <!--    <query match="tok[. = '(']"/> -->
 <!--    <ref name="anything-until-rbracket"/> -->
 <!--    <query match="tok[. = ')']"/> -->
 <!--    <ref name="run_to_full_stop"/>    -->
 <!--   </seq> -->
 <!--  </rule> -->
 
 <rule name="NP_mozna_inf_jako_definiens-acc" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <first>
    <seq>
     <query match="tok[. = 'można']"/>
     <ref name="Adv" mult="*"/>
     <ref name="Infinitive"/>
    </seq>
    <seq>
     <query match="tok[. = 'może']"/>
     <ref name="Adv" mult="*"/>
     <ref name="Infinitive"/>
     <query match="tok[@ctag='ppas']"/>
    </seq>
   </first>
   <query match="tok[. = 'jako']"/>
   <first>
    <ref name="NP1">
     <with-param name="case" value="':acc:'"/>
    </ref>
    <!-- This should be acc, but it is often tagged as nom... -->
    <ref name="NP1">
     <with-param name="case" value="':nom:'"/>
    </ref>
   </first>
   <ref name="run_to_full_stop"/>
  </seq>	
 </rule>
 
 <rule name="NP_to_NP_ktory" wrap="definingText" attrs="def='$defid'">
  <seq>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <ref name="copula"/>
   <query match="tok[@base = 'taki']" mult="?"/> 
   <ref name="NP2">
    <with-param name="case" value="''"/>
   </ref>
   <query match="tok[. = ',']" mult="?"/>
   <query match="tok[@base = 'który']"/> 
   <ref name="run_to_full_stop"/>   
  </seq>	
 </rule>

 <rule name="termin_NP_odnosi_sie" wrap="definingText" attrs="def='$defid'">
  <seq>
   <query match="tok[. ~ '^[tT]ermin$']"/>
   <ref name="maybe_term" wrap="markedTerm" attrs="id='$defid'"/>
   <query match="tok[@base = 'odnosić']" mult="?"/> 
   <ref name="run_to_full_stop"/>   
  </seq>	
 </rule>
 

 <rule name="main">
  <first>

   <ref name="za_NP_uwaza_sie_definiens">
    <with-param name="defid" value="'za_NP_uwaza_sie_definiens'"/>
   </ref>
   
   <!--    <ref name="acronym"> -->
   <!--     <with-param name="defid" value="'acronym'"/> -->
   <!--    </ref> -->

   <ref name="NP_definiens-in-brackets">
    <with-param name="defid" value="'NP_definiens-in-brackets'"/>
   </ref>
   
   <ref name="NP_to_jest_definiens">
    <with-param name="defid" value="'NP_to_jest_definiens'"/>
   </ref>

   <ref name="NP_jest_definiens-inst">
    <with-param name="defid" value="'NP_jest_definiens-inst'"/>
   </ref>

   <!--    <ref name="Definiens-inst_jest_NP"> -->
   <!--     <with-param name="defid" value="'Definiens-inst_jest_NP'"/> -->
   <!--    </ref> -->

   <ref name="NP_colon_definiens-nom">	    
    <with-param name="defid" value="'NP_colon_definiens-nom'"/>
   </ref>	   
   
   <ref name="NP_copula_definiens-nom">
    <with-param name="defid" value="'NP_copula_definiens-nom'"/>
   </ref>	   
   
   <ref name="S_Beg_copula_definiens-nom">
    <with-param name="defid" value="'S_Beg_copula_definiens-nom'"/>
   </ref>	   
   
   <ref name="NP_czyli_definiens-anyquestion">
    <with-param name="defid" value="'NP_czyli_definiens-anyquestion'"/>
   </ref>

   <ref name="NP_czyli_definiens-nom">
    <with-param name="defid" value="'NP_czyli_definiens-nom'"/>
   </ref>
   
   <ref name="NP_mozna_inf_jako_definiens-acc">
    <with-param name="defid" value="'NP_mozna_inf_jako_definiens-acc'"/>
   </ref>
   
   <ref name="NP_definiens-VP-3pers">
    <with-param name="defid" value="'NP_definiens-VP-3pers'"/>
   </ref>

   <ref name="NP_to_NP_ktory">
    <with-param name="defid" value="'NP_to_NP_ktory'"/>
   </ref>
   
   <ref name="termin_NP_odnosi_sie">
    <with-param name="defid" value="'termin_NP_odnosi_sie'"/>
   </ref>
   
  </first>
 </rule>

</rules>

