#!/usr/bin/env ruby
 #
 # This file is part of the Anotatornia suite.
 # 
 # Copyright © 2007, 2008, 2009, 2010 by Instytut Podstaw Informatyki
 # Polskiej Akademii Nauk (IPI PAN; Institute of Computer Science, Polish
 # Academy of Sciences; cf. www.ipipan.waw.pl).  All rights reserved.
 # 
 # This file may be distributed and/or modified under the terms of the
 # GNU General Public License version 3 as published by the Free Software
 # Foundation and appearing in the file COPYING included in the packaging
 # of this file.  (See http://www.gnu.org/licenses/translations.html for
 # unofficial translations.)
 # 
 # A commercial license is available from IPI PAN (contact
 # Michal.Ciesiolka.waw.pl or ipi.waw.pl for more
 # information).  Licensees holding a valid commercial license from IPI
 # PAN may use this file in accordance with that license.
 # 
 # This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING
 # THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE.
 #

#
# written 2007/08/08 and 2007/09/12 by natror
# rewritten for NKJP on 2009/01/14 by natror.

require 'date'
require 'ftools' # for File.copy etc.

require 'jcode' # doesn't redefine String functions but defines its own
$KCODE="u" # or "UTF8" to be verbose

require 'find'

require 'ramkowanie'


  __FILE__ =~ /\/([^\/]+)$/
$myname=$1
$mydirname= File.dirname(__FILE__)

## require "#{mydirname}/script/console"

exit if  Time.now > '2009/8/25 22:00'.to_time


# na wypadek, gdyby miało być więcej plików morph.

$source_of_meanings = "nkjp_wsd_si/NKJP_WSI.xml"

if SensAnot.find( :first )
  raise "@@@@ Próba usunięcia starych sensów gdy istnieje sens_anot!"
end

# dwie poniższe metody definiuje \file{ramkowanie.rb}
ActiveRecord::Base.pragmas_for_update

puts "czyszczę tabele sensy i cz_m_leksem"
Sensy.delete_all
CzMLeksem.delete_all

arbc = ActiveRecord::Base.connection

puts "odkojarzam tabele leksem i interpretacja"
["leksem", "interpretacja"].each { |tname|
  # czyścimy przypisania sensów do interpretacyj i leksemów
  arbc.execute "update #{tname} set cz_m_leksem_id=null, cz_m_leksem_przypisany='f'"
}

arbc.execute "update statusy set word_senses=-1, word_senses_uzid=null"
# #{AkapitTranszy.update_all_Stati} błędne: powodowało pustozatwierdzenie wsd wszystkich akatów, które wobec tego nie były obrabiane po wlaniu słownika.

Sensy.reset_column_information

raise "Sensy bez cz_m_leksem_id!!!" unless Sensy.column_names.include?( "cz_m_leksem_id" )

def nowe_entry( xmlid )
  $cz_m_leksem = CzMLeksem.new(
                               :xmlid => xmlid )
end # of #nowe_entry

def set_orth( lemat )
  $cz_m_leksem.lemat = lemat
end

def set_cz_m( cz_m_ozn )
  cz_m_ozn = 'noun'  if cz_m_ozn == 'subst'
  puts "@@@@ cz_m_ozn: »#{cz_m_ozn}«"
  $cz_m_leksem.cz_m_id = CzM.find_by_cz_m_ozn( cz_m_ozn ).id
end
  
def nowy_sens( numerek, xmlid )
  # test koszerności
  unless xmlid == (lem192 = $cz_m_leksem.lemat.odpolszcz) +'.'+ numerek or
      xmlid == lem192 + '.INNE'
    raise "xmlid #{xmlid} niespójny z lematem #{lem192} i numerkiem #{numerek}"
  end
  $sens =  Sensy.new( :n => numerek.to_i, 
                      :xmlid => xmlid, 
                      :cz_m_id => $cz_m_leksem.cz_m_id )
  $sens.cz_m_leksem = $cz_m_leksem
end

def set_def( short_d )
  $sens.short_def = short_d
end

def output_long_def_xml
  $sens.long_def_xml = $long_def_xml.join("\n")
  $sens.save! # to zapisze także #{$cz_m_leksem}, jeśli jeszcze nie był
  $long_def_xml = []
end

def badclosure( what_got, what_expected )
  raise "I got </#{what_got}> while expecting </#{what_expected}>!"
end

def check_closure( what_got )
  unless what_got == ( expected = $we_are_in.pop )
    badclosure( what_got, expected )
  end
end


# Main loop

$in_body = false
$we_are_in = []

$long_def_xml = []

File.open( $source_of_meanings ) { |som|
  
  som.each {|line| 
    line.chomp!
    ##    line.strip! if line

    if $in_body
    
      case line
      when /<entry xml:id="(.*)">/
        $we_are_in.push( :entry )
        nowe_entry( $1 )
      when /<form>/
        $we_are_in.push( :form )
      when /<orth>(.*)<\/orth>/
        set_orth( $1 )
      when /<\/form>/
        check_closure( :form )
      when/<gramGrp>/
        $we_are_in.push( :cz_m )
      when /<pos>(.*)<\/pos>/
        set_cz_m( $1 )
      when /<\/gramGrp>/
        check_closure( :cz_m )
      when /<sense n="(\d+)" xml:id="(.*)">/
        $we_are_in.push( :sense )
        nowy_sens( $1, $2 )
      when /<def>(.*)<\/def>/
        if $we_are_in[-1] == :sense
          set_def( $1 )
        else $long_def_xml << line
        end
      when /<def>(.*)/
        case $we_are_in[-1]
        when :sense
          $we_are_in.push( :def )
          $def = $1
        when :subsense
          $long_def_xml << line
        else
          raise "unhandled <def>'s placement!: in #{$we_are_in[-1]}"
        end
      when /(.*)<\/def>/
        case  $we_are_in[-1]
          when :def
          $def += ' ' + line
          set_def( $def )
          check_closure( :def )
        when :subsense
          $long_def_xml << line
        else
          raise "unhandled </def>'s placement: in #{$we_are_in[-1]}"
        end
      when /<sense n="(.*)">/
        $we_are_in.push( :subsense )
        $long_def_xml << line
      when /<\/sense>/
        expected = $we_are_in.pop
        case expected 
        when :subsense
          $long_def_xml << line
        when :sense
          output_long_def_xml
        else
          badclosure( "»/sense«", expected )
        end
      when /<\/entry>/
        check_closure( :entry )
      when /<\/body>/
        check_closure( nil )
        $in_body = false

      else # of main #case
        case $we_are_in[-1]
        when :def
          $def += ' ' + line
        when :subsense
          $long_def_xml << line
        else
          unless line == ''
            raise "unhandled kind of line in #{$we_are_in[-1]}:\n »#{line}«"
          end
        end
      end# of main case
      
    else # not in <body>
      $in_body = true if line =~ /<body>/
    end # of in body or not
  } # of each line of the file,
} # Ruby closes the file.


puts "dopisuję tłumaczenie pełnych opisów na html"
Sensy.long2html( :all )

puts "kojarzę sensy z tabelami leksem i interpretacja (to potrwa jakiś czas)"
CzMLeksem.przypisz_leksemom_i_interpretacjom
puts "zaznaczam akapity do anotacji wsd i autozatwierdzam pustospełnione"
Akapit.autozatwierdź_word_senses # zawiera #{update_Status}.

ActiveRecord::Base.pragmas_normal

true

# about ranges: file:///usr/share/doc/rubybook/html/tut_stdtypes.html

# very instructive: /usr/lib/ruby/1.8/jcode.rb

### Local Variables:
### coding: utf-8
### End:
