package main; # Written 2007 Chad Redman # Free for any use use DBI; my $INFILE = 'G:/Home/Chinese/corpora/LCMC/sources-extracted-mod1.txt.U8'; #my $DBFILE = 'G:/Home/Chinese/devel/corpus/lcmc_pinyin.db3'; my $DBFILE = 'G:/Home/Chinese/devel/corpus/lcmc'; open(IN, "<:utf8", "$INFILE") or die $!; my %sources = (); my $state = 'start'; my $cur_file = '???'; my $cur_source = ''; while ($line = ) { chomp; if ($state eq 'start') { next unless $line =~ /\w/; next if ($line =~ /^Category /); next if ($line =~ /^\(Note: Some texts may be shortened\)$/); if ($line =~ /^[A-R]\d\d$/) { $cur_file = $&; $state = 'text'; } else { print "Warning: unexpected line '$line' on line $.\n"; } } elsif ($state eq 'text') { if ($line =~ /^[A-R]\d\d$/) { #ending one onject and starting another $sources{$cur_file} = $cur_source; $cur_file = $&; $cur_source = ''; } elsif ($line =~ /^Category /) { #terminates an object, but new one not found yet $sources{$cur_file} = $cur_source; $cur_file = '???'; $cur_source = ''; $state = 'start'; } else { ($line !~ /^\n/) and $cur_source .= (length($cur_source) ? "\n" : '') . $line; } } } if (length($cur_source)) { $sources{$cur_file} = $cur_source; } my $dbh = DBI->connect("dbi:SQLite:dbname=$DBFILE","","") or die $!; $dbh->{unicode} = 1; $dbh->{AutoCommit} = 0; foreach my $file (keys %sources) { eval { write_object($dbh, $file, $sources{$file}); }; if ($@) { print "Transaction aborted because $@"; eval { $dbh->rollback }; exit 1; } } $dbh->commit; # commit the changes if we get this far exit 0; sub write_object { my ($dbh, $id, $val) = @_; my $sth = $dbh->prepare("UPDATE files SET reference = ? WHERE id = ?"); my $rows_affected = $sth->execute($val, $id); if ($rows_affected != 1) { print "*Error: $id: $rows_affected, ", $dbh->errstr, "\n"; return; } }