www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

dump2sqlite.sh (4542B)


      1 #!/bin/bash
      2 
      3 # TODO : sed -E sur certaines machines, sed -r sur d'autres.
      4 
      5 user="foo"
      6 passwd="bar"
      7 
      8 if ! [ -e "$1" ]; then
      9 	echo "Le fichier $1 n'existe pas !"
     10 	exit 1
     11 fi
     12 
     13 echo "  dump2sql.sh : conversion des dumps de JeuxDeMots vers du sql (sqlite3)." >&2
     14 echo "  La progression est affichée avec pv. Si vous n'avez pas pv, supprimez la ligne correspondante dans ce script." >&2
     15 echo "  Et c'est parti !" >&2
     16 echo >&2
     17 
     18 # Played_game(type) : 0 => partie de référence, 1 => joueur
     19 # Note : l'index i_played_game_all sert à la vérification lors du set_partie.
     20 # Note : le echo | dd | md5 permet de ne pas avoir le \n, y compris sur les versions de sh sous mac boguées qui ne supportent pas «echo -n»
     21 # Valeurs pour le champ group dans user : 1 = player, 2 = admin
     22 
     23 cat <<EOF
     24 begin transaction;
     25 create table node(eid integer primary key autoincrement, name, type, weight);
     26 create table relation(rid integer primary key autoincrement, start, end, type, weight);
     27 create table type_node(name, num);
     28 create table type_relation(name, num, extended_name, info);
     29 create table user(login primary key, mail, hash_passwd, score, ugroup, cgCount);
     30 create table game(gid integer primary key autoincrement, eid_central_word, relation_1, relation_2, difficulty, author, nb_like, nb_dislike);
     31 create table game_cloud(gid, num, difficulty, eid_word, totalWeight, probaR1, probaR2, probaR0, probaTrash);
     32 create table played_game(pgid, gid, login, timestamp, like, primary key(login, pgid));
     33 create table played_game_sequence(id integer primary key autoincrement);
     34 create table played_game_cloud(pgid, login, gid, type, num, relation, weight, score);
     35 create table colon_nodes(eid);
     36 create table random_cloud_node(eid,nbneighbors);
     37 create table random_center_node(eid);
     38 create table user_info(user, key, value, primary key (user, key));
     39 
     40 insert into user(login, mail, hash_passwd, score, ugroup, cgCount) values('$(echo "$user" | sed -e "s/'/''/g")', 'foo@isp.com', '$(echo "$passwd" | 
     41 dd bs=1 count="${#passwd}" | (if which md5sum >/dev/null 2>&1; then md5sum; else md5; fi) | cut -d ' ' -f 1)', 0, 1, 0);
     42 insert into user(login, mail, hash_passwd, score, ugroup, cgCount) values('bot', 'no mail', 'no password', 0, 1, 0);
     43 EOF
     44 
     45 # tr : pour virer le CRLF qui traîne
     46 # Le gros tas de sed / tr : pour virer le newline dans une des description étendue
     47 cat "$1" \
     48 | iconv -f iso-8859-1 -t utf-8 \
     49 | tr '\r' ' ' \
     50 | sed -e 's/X/XX/g' | sed -e 's/A/Xa/g' | tr '\n' 'A' | sed -e 's/A")/")/g' | tr 'A' '\n' | sed -e 's/Xa/A/g' | sed -e 's/XX/X/g' \
     51 | pv -s "$(wc -c "$1" | sed -E -e 's/^ *([0-9]*) .*$/\1/')" \
     52 | sed -E \
     53   -e 's#\\##g' \
     54   -e "s#'#''#g" \
     55   -e 's#^/?// [0-9]+ occurrences of relations ([a-z_]+) \(t=([0-9]+) nom_etendu="([^"]+)" info="([^"]+)"\)$#insert into type_relation(name, num, extended_name, info) values('\''\1'\'', \2, '\''\3'\'', '\''\4'\'');#' \
     56   -e 's#^/?// [0-9]+ occurrences of nodes ([a-z_]+) \(t=([0-9]+)\)$#insert into type_node(name, num) values('\''\1'\'', \2);#' \
     57   -e 's#^eid=([0-9]+):n="(.*)":t=([0-9]+):w=(-?[0-9]+)$#insert into node(eid, name, type, weight) values(\1, '\''\2'\'', \3, \4);#' \
     58   -e 's#^rid=([0-9]+):n1=([0-9]+):n2=([0-9]+):t=([0-9]+):w=(-?[0-9]+)#insert into relation(rid, start, end, type, weight) values(\1,\2,\3,\4,\5);#' \
     59 | grep -v '^//' \
     60 | grep -v '^$'
     61 
     62 cat <<EOF
     63 create index i_relation_start on relation(start);
     64 create index i_relation_end on relation(end);
     65 create index i_relation_type on relation(type);
     66 create index i_relation_start_type on relation(start,type);
     67 create index i_relation_end_type on relation(end,type);
     68 create index i_played_game_all on played_game(pgid, gid, login, timestamp);
     69 create index i_colon_nodes_eid on colon_nodes(eid);
     70 
     71 insert into colon_nodes(eid) select eid from node where name glob '::*';
     72 
     73 insert into random_cloud_node(eid,nbneighbors) select eid,sum(nb) from (
     74 	select (select type from node where node.eid = relation.start) as type,
     75 		start as eid,
     76 		count(start) as nb from relation
     77 		where type not in (4, 12, 36, 18, 29, 45, 46, 47, 48, 1000, 1001) and start not in colon_nodes
     78 		group by start
     79 	union
     80 	select (select type from node where node.eid = relation.start) as type,
     81 		end as eid,
     82 		count(end) as nb from relation
     83 		where type not in (4, 12, 36, 18, 29, 45, 46, 47, 48, 1000, 1001) and start not in colon_nodes
     84 		group by end
     85 ) where type = 1 group by eid;
     86 create index i_random_cloud_node_nbneighbors on random_cloud_node(nbneighbors);
     87 insert into random_center_node(eid) select eid from random_cloud_node where nbneighbors > 6;
     88 commit;
     89 EOF