[ferlanero@ferlanero-imac okb-engine-master]$ db/build.sh es Building for languages: es ~/okb-engine-master/ngrams ~/okb-engine-master/db running build running build_ext running build running build_ext ~/okb-engine-master/db ~/okb-engine-master/cluster ~/okb-engine-master/db make: No se hace nada para 'first'. ~/okb-engine-master/db «/home/ferlanero/okb-engine-master/db/lang-en.cf» -> «/home/ferlanero/okboard/langs/lang-en.cf» «/home/ferlanero/okb-engine-master/db/lang-es.cf» -> «/home/ferlanero/okboard/langs/lang-es.cf» «/home/ferlanero/okb-engine-master/db/lang-fr.cf» -> «/home/ferlanero/okboard/langs/lang-fr.cf» «/home/ferlanero/okb-engine-master/db/lang-nl.cf» -> «/home/ferlanero/okboard/langs/lang-nl.cf» «/home/ferlanero/okb-engine-master/db/add-words-fr.txt» -> «/home/ferlanero/okboard/langs/add-words-fr.txt» «/home/ferlanero/okb-engine-master/db/db.version» -> «/home/ferlanero/okboard/langs/db.version» make: '.depend-es' está actualizado. ( [ -f "add-words-es.txt" ] && cat "add-words-es.txt" ; aspell -l es dump master ) | sort | uniq > es-full.dict lbzip2 -d < /home/ferlanero/okboard/langs/corpus-es.txt.bz2 | /home/ferlanero/okb-engine-master/db/../tools/corpus-splitter.pl 200 50 es-learn.tmp.bz2 es-test.tmp.bz2 mv -vf es-learn.tmp.bz2 es-learn.txt.bz2 «es-learn.tmp.bz2» -> «es-learn.txt.bz2» mv -vf es-test.tmp.bz2 es-test.txt.bz2 «es-test.tmp.bz2» -> «es-test.txt.bz2» set -o pipefail ; lbzip2 -d < es-learn.txt.bz2 | /home/ferlanero/okb-engine-master/db/../tools/import_corpus.py es-full.dict | sort -rn | lbzip2 -9 > grams-es-full.csv.bz2.tmp mv -f grams-es-full.csv.bz2.tmp grams-es-full.csv.bz2 set -o pipefail ; lbzip2 -d < grams-es-full.csv.bz2 | grep ';#NA;#NA;' | cut -f '1,4' -d';' \ | grep -v '#TOTAL' | sort -rn | cut -d';' -f 2 | egrep -v '^(i)$' | tee words-es.txt \ | sed -n "1,30000 p" > es-predict.dict.tmp # ok i've re-implemented "head" with sed to avoid ugly sigpipes (which hurt with -o pipefail) mv -f es-predict.dict.tmp es-predict.dict set -o pipefail ; lbzip2 -d < es-learn.txt.bz2 | /home/ferlanero/okb-engine-master/db/../tools/import_corpus.py es-predict.dict | lbzip2 -9 > grams-es-learn.csv.bz2.tmp /home/ferlanero/okb-engine-master/db/../tools/loadkb.py es-full.tre < es-full.dict set -o pipefail ; lbzip2 -d < es-test.txt.bz2 | /home/ferlanero/okb-engine-master/db/../tools/import_corpus.py es-predict.dict | lbzip2 -9 > grams-es-test.csv.bz2.tmp mv -f grams-es-learn.csv.bz2.tmp grams-es-learn.csv.bz2 Computing clusters for language es. Please make some coffee ... (logs can be found in clusters-es.log) set -o pipefail ; lbzip2 -d < grams-es-learn.csv.bz2 | sort -rn | sed -n "1,13500000 p" \ | /home/ferlanero/okb-engine-master/db/../tools/cluster -n 10 -o clusters-es.tmp > clusters-es.log 2>&1 mv -f clusters-es.tmp clusters-es.txt mv -f grams-es-test.csv.bz2.tmp grams-es-test.csv.bz2 1000 set -o pipefail ; lbzip2 -d < grams-es-learn.csv.bz2 \ | /home/ferlanero/okb-engine-master/db/../tools/clusterize.py -l 8 -w 200000 -c 500000 clusters-es.txt \ | tee predict-es.txt \ | /home/ferlanero/okb-engine-master/db/../tools/load_cdb_fslm.py predict-es-tmp.db Import CSV corpus data ... Dumping compressed ngram file ... Dumping words to database ... 2000 lbzip2 -9fv predict-es.txt lbzip2: compressing "predict-es.txt" to "predict-es.txt.bz2" lbzip2: "predict-es.txt": compression ratio is 1:2.274, space savings is 56.02% /home/ferlanero/okb-engine-master/db/../tools/db_param.py predict-es-tmp.db version 11 lbzip2 -9f predict-es-tmp.rpt mv -f predict-es-tmp.db predict-es.db mv -f predict-es-tmp.ng predict-es.ng mv -f predict-es-tmp.rpt.bz2 predict-es.rpt.bz2 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000 31000 32000 33000 34000 35000 36000 37000 38000 39000 40000 41000 42000 43000 44000 45000 46000 47000 48000 49000 50000 51000 52000 53000 54000 55000 56000 /home/ferlanero/okb-engine-master/db/../tools/loadkb.py es.tre < words-es.txt # all word seens in learn corpus (smaller than full directory, but bigger than prediction learning dictionary) OK es sending incremental file list es-full.tre es.tre predict-es.db predict-es.ng predict-es.rpt.bz2 sent 2,423,995 bytes received 111 bytes 4,848,212.00 bytes/sec total size is 2,423,052 speedup is 1.00