#!/bin/bash rm *.html lynx -dump -source 'http://hal.inria.fr/index.php?action_todo=search&s_type=advanced&langue=fr&submit=1&orderby=DATEPROD&ascdesc=DESC&p_0=contained&v_0=olivier&f_0=FULLNAME&l_0=and&p_1=is_exactly&v_1=teytaud&f_1=LASTNAME&l_1=and&p_2=is_exactly&f_2=LAB&l_2=and&p_3=contained&f_3=ORGANISME&search_in_typdoc[0]=ART_ACL&search_in_typdoc[1]=ART_SCL&search_in_typdoc[2]=COMM_ACT&search_in_typdoc[3]=COMM_SACT&search_in_typdoc[4]=CONF_INV&search_in_typdoc[5]=OUVS&search_in_typdoc[6]=COVS&search_in_typdoc[7]=DOUV&search_in_typdoc[8]=REPORT&search_in_typdoc[9]=PATENT&search_in_typdoc[10]=OTHER&search_in_typdoc[11]=THESE&search_in_typdoc[12]=HDR&search_in_typdoc[13]=COURS&search_in_typdoc[14]=UNDEFINED' > fichier1.html for i in `seq 10 10 200` do lynx -dump -source "http://hal.inria.fr/index.php?s_type=advanced&submit=1&halsid=se31fttr7fba23uoifd6fh49p1&orderby=DATEPROD&ascdesc=DESC&p_0=contained&v_0=olivier&f_0=FULLNAME&l_0=and&p_1=is_exactly&v_1=teytaud&f_1=LASTNAME&l_1=and&p_2=is_exactly&f_2=LAB&l_2=and&p_3=contained&f_3=ORGANISME&search_in_typdoc[0]=ART_ACL&search_in_typdoc[1]=ART_SCL&search_in_typdoc[2]=COMM_ACT&search_in_typdoc[3]=COMM_SACT&search_in_typdoc[4]=CONF_INV&search_in_typdoc[5]=OUVS&search_in_typdoc[6]=COVS&search_in_typdoc[7]=DOUV&search_in_typdoc[8]=REPORT&search_in_typdoc[9]=PATENT&search_in_typdoc[10]=OTHER&search_in_typdoc[11]=THESE&search_in_typdoc[12]=HDR&search_in_typdoc[13]=COURS&search_in_typdoc[14]=UNDEFINED&begin_at=$i" > fichier${i}.html #konqueror 'http://hal.inria.fr/index.php?action_todo=search&s_type=advanced&langue=fr&submit=1&orderby=DATEPROD&ascdesc=DESC&p_0=contained&v_0=olivier&f_0=FULLNAME&l_0=and&p_1=is_exactly&v_1=teytaud&f_1=LASTNAME&l_1=and&p_2=is_exactly&f_2=LAB&l_2=and&p_3=contained&f_3=ORGANISME&search_in_typdoc[0]=ART_ACL&search_in_typdoc[1]=ART_SCL&search_in_typdoc[2]=COMM_ACT&search_in_typdoc[3]=COMM_SACT&search_in_typdoc[4]=CONF_INV&search_in_typdoc[5]=OUVS&search_in_typdoc[6]=COVS&search_in_typdoc[7]=DOUV&search_in_typdoc[8]=REPORT&search_in_typdoc[9]=PATENT&search_in_typdoc[10]=OTHER&search_in_typdoc[11]=THESE&search_in_typdoc[12]=HDR&search_in_typdoc[13]=COURS&search_in_typdoc[14]=UNDEFINED' #konqueror "http://hal.inria.fr/index.php?s_type=advanced&submit=1&halsid=0k1uj83eb7o2fu5h7fip52d0n7&orderby=DATEPROD&ascdesc=DESC&p_0=contained&v_0=olivier&f_0=FULLNAME&c_0=&l_0=and&p_1=is_exactly&v_1=teytaud&f_1=LASTNAME&c_1=&l_1=and&p_2=is_exactly&v_2=&f_2=LAB&c_2=&l_2=and&p_3=contained&v_3=&f_3=ORGANISME&c_3=&search_in_typdoc[0]=ART_ACL&search_in_typdoc[1]=ART_SCL&search_in_typdoc[2]=COMM_ACT&search_in_typdoc[3]=COMM_SACT&search_in_typdoc[4]=CONF_INV&search_in_typdoc[5]=OUVS&search_in_typdoc[6]=COVS&search_in_typdoc[7]=DOUV&search_in_typdoc[8]=REPORT&search_in_typdoc[9]=PATENT&search_in_typdoc[10]=OTHER&search_in_typdoc[11]=THESE&search_in_typdoc[12]=HDR&search_in_typdoc[13]=COURS&search_in_typdoc[14]=UNDEFINED&begin_at=$i" #exit #> fichier${i}.html done echo "
" echo ""
idx=0
for html in `grep "inria-" *.html | sed 's/.*inria\-/http:\/\/hal.inria.fr\/inria-/g' | sed 's/\&.*//g' | uniq `
do
idx=`expr $idx + 1`
echo case $idx 1>2
echo -n ""
echo -n " Publi ${idx}. "
echo ' ' > publi${idx}.html
lynx -source -dump $html | grep meta_title | sed 's/.*meta_title">//g' | sed 's/<\/td>//g' >> publi${idx}.html
echo " " >> publi${idx}.html
lynx -dump -source $html | grep DC.date | sed 's/.*content="//g' | sed 's/".*//g' >> publi${idx}.html
echo " " >> publi${idx}.html
echo " PDF file " >> publi${idx}.html
echo "" >> publi${idx}.html
lynx -source -dump $html | grep meta_title | sed 's/.*meta_title">//g' | sed 's/<\/td>//g'
echo "
" >> publi${idx}.html
echo -n `lynx -dump -source $html | grep DC.creator | sed 's/.*content="//g' | sed 's/".*/
/g'` >> publi${idx}.html
echo "
" >> publi${idx}.html
echo "Abstract:" >> publi${idx}.html
lynx -source -dump $html | grep meta_abstract | sed 's/.*meta_abstract">//g' | sed 's/<\/td.*//g' >> publi${idx}.html
echo "
"
lynx -dump -source $html | grep DC.date | sed 's/.*content="//g' | sed 's/".*//g'
echo "
"
echo -n `lynx -dump -source $html | grep DC.creator | sed 's/.*content="//g' | sed 's/".*/
/g'`
echo "
"
echo "Abstract:"
lynx -source -dump $html | grep meta_abstract | sed 's/.*meta_abstract">//g' | sed 's/<\/td.*//g'
echo "
" echo " PDF file
" echo " Bibtex (click on \"Export this paper\")" done echo "" rm fichier*.html scp *.html acces.lri.fr:WWW/