用户:Wikibot:修订间差异
小 新页面: 机器人,自动将 http://help.ubuntu.com 和 http://wiki.ubuntu.com 由 monimoni 格式转换到 mediawiki 格式,并自动更新和发布的小程序。 由java写成。 |
小无编辑摘要 |
||
第1行: | 第1行: | ||
机器人,自动将 http://help.ubuntu.com 和 http://wiki.ubuntu.com 由 monimoni 格式转换到 mediawiki 格式,并自动更新和发布的小程序。 | 机器人,自动将 http://help.ubuntu.com 和 http://wiki.ubuntu.com 由 monimoni 格式转换到 mediawiki 格式,并自动更新和发布的小程序。 | ||
由java写成。 | 由java写成。 | ||
<nowiki> | |||
/* | |||
* Main.java | |||
* | |||
* Created on 2007年5月12日, 下午1:31 | |||
* | |||
* To change this template, choose Tools | Template Manager | |||
* and open the template in the editor. | |||
*/ | |||
package wiki; | |||
import java.io.BufferedInputStream; | |||
import java.io.BufferedReader; | |||
import java.io.DataOutputStream; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.FileOutputStream; | |||
import java.io.FileReader; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.io.InputStreamReader; | |||
import java.io.OutputStream; | |||
import java.io.OutputStreamWriter; | |||
import java.io.PrintWriter; | |||
import java.io.UnsupportedEncodingException; | |||
import java.net.HttpURLConnection; | |||
import java.net.MalformedURLException; | |||
import java.net.URL; | |||
import java.net.URLConnection; | |||
import java.net.URLEncoder; | |||
import java.security.GeneralSecurityException; | |||
import java.security.Security; | |||
import java.security.cert.X509Certificate; | |||
import java.util.HashMap; | |||
import java.util.Iterator; | |||
import java.util.List; | |||
import java.util.Map; | |||
import java.util.Vector; | |||
import java.util.regex.Matcher; | |||
import java.util.regex.Pattern; | |||
import javax.net.ssl.HostnameVerifier; | |||
import javax.net.ssl.HttpsURLConnection; | |||
import javax.net.ssl.SSLContext; | |||
import javax.net.ssl.SSLSession; | |||
import javax.net.ssl.X509TrustManager; | |||
/** | |||
* | |||
* @author oneleaf | |||
*/ | |||
public class Main { | |||
List <String> addDict= new Vector<String>(); | |||
List <String> oldDict= new Vector<String>(); | |||
String cookie=getCookie(); | |||
private String getCookie(){ | |||
String cookie = ""; | |||
try{ | |||
URL httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin"); | |||
HttpURLConnection httpConn = (HttpURLConnection)httpurl.openConnection(); | |||
httpConn.addRequestProperty("Cookie",cookie); | |||
httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); | |||
httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); | |||
httpConn.setRequestProperty("Connection", "Keep-Alive"); | |||
httpConn.setUseCaches(false); | |||
cookie=httpConn.getHeaderField("Set-Cookie"); | |||
String data="wpName=wikibot&wpPassword=********&wpRemember=1"; | |||
httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin&action=submitlogin&type=login"); | |||
httpConn = (HttpURLConnection)httpurl.openConnection(); | |||
httpConn.setRequestMethod("POST"); | |||
httpConn.addRequestProperty("Cookie",cookie); | |||
httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); | |||
httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); | |||
httpConn.setRequestProperty("Content-Language","UTF-8" ); | |||
httpConn.setRequestProperty("Content-Length", ""+data.getBytes().length); | |||
httpConn.setRequestProperty("Connection", "Keep-Alive"); | |||
httpConn.setDoOutput(true); | |||
httpConn.setDoInput(true); | |||
httpConn.setUseCaches(false); | |||
DataOutputStream outStream = new DataOutputStream(httpConn.getOutputStream()); | |||
outStream.writeBytes(data); | |||
outStream.flush(); | |||
outStream.close(); | |||
List <String> list=httpConn.getHeaderFields().get("Set-Cookie"); | |||
for (int i=0;i<list.size();i++){ | |||
cookie=cookie+"; "+list.get(i); | |||
} | |||
// Iterator iter=httpConn.getHeaderFields().keySet().iterator(); | |||
// while (iter.hasNext()){ | |||
// String key=(String) iter.next(); | |||
// List list=httpConn.getHeaderFields().get(key); | |||
// System.out.print(key+": "); | |||
// for (int i=0;i<list.size();i++){ | |||
// System.out.print(list.get(i)); | |||
// } | |||
// System.out.print("\n"); | |||
// } | |||
// | |||
// cookie=httpConn.getHeaderField("Set-Cookie"); | |||
// System.out.println("Cookie_2:"+cookie); | |||
// BufferedReader in = new BufferedReader(new InputStreamReader(httpConn.getInputStream())); | |||
// String line; | |||
// while ((line = in.readLine())!= null){ | |||
// System.out.println(line); | |||
// // result += line+"\n"; | |||
// } | |||
// in.close(); | |||
} catch (Exception ex){ | |||
ex.printStackTrace(); | |||
} | |||
return cookie; | |||
} | |||
private void addDict(String dict){ | |||
String str=dict.trim(); | |||
if (dict.startsWith("/")){ | |||
str=dict.substring(1); | |||
}else if (dict.indexOf("#")>0){ | |||
str=dict.substring(0,dict.indexOf("#")); | |||
}else if (dict.indexOf("?")>0){ | |||
str=dict.substring(0,dict.indexOf("?")); | |||
}else if (dict.startsWith("./")){ | |||
str=dict.substring(2); | |||
}else if (dict.startsWith("../")){ | |||
str=dict.substring(3); | |||
} | |||
if (str.toLowerCase().indexOf("team")>0) return; | |||
if (str.trim().length()==0) return; | |||
if (str.trim().length()>=256) return; | |||
//../CommandLine | |||
if (oldDict.contains(str)) return; | |||
if (addDict.contains(str)) return; | |||
addDict.add(str); | |||
} | |||
private void delDict(int dictindex){ | |||
oldDict.add(addDict.get(dictindex)); | |||
addDict.remove(dictindex); | |||
} | |||
private void clearDict(){ | |||
addDict.clear(); | |||
oldDict.clear(); | |||
} | |||
private void getDicts(String html){ | |||
Pattern pattern= Pattern.compile("\\[UbuntuHelp:(.*?)\\]"); | |||
Matcher matcher=pattern.matcher(html); | |||
while(matcher.find()) { | |||
String line=matcher.group(1); | |||
if (line.indexOf("|")>0){ | |||
addDict(line.substring(0,line.indexOf("|"))); | |||
}else{ | |||
addDict(line); | |||
} | |||
} | |||
} | |||
/** Creates a new instance of Main */ | |||
public Main() { | |||
SSLContext sslContext = null; | |||
try { | |||
sslContext = SSLContext.getInstance("TLS"); | |||
X509TrustManager[] xtmArray = new X509TrustManager[] { xtm }; | |||
sslContext.init(null, xtmArray, new java.security.SecureRandom()); | |||
} catch(GeneralSecurityException gse) { | |||
} | |||
if(sslContext != null) { | |||
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory()); | |||
} | |||
HttpsURLConnection.setDefaultHostnameVerifier(hnv); | |||
} | |||
private X509TrustManager xtm = new X509TrustManager() { | |||
public void checkClientTrusted(X509Certificate[] chain, String authType) {} | |||
public void checkServerTrusted(X509Certificate[] chain, String authType) {} | |||
public X509Certificate[] getAcceptedIssuers() { | |||
return null; | |||
} | |||
}; | |||
private HostnameVerifier hnv = new HostnameVerifier() { | |||
public boolean verify(String hostname, SSLSession session) { | |||
return true; | |||
} | |||
}; | |||
public String getUrl(String urladdress,String dict) throws IOException{ | |||
URL url=new URL(urladdress); | |||
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection(); | |||
httpConn.setReadTimeout(60000); | |||
httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); | |||
httpConn.setRequestProperty("Content-Language","UTF-8" ); | |||
httpConn.setRequestProperty("Connection", "Keep-Alive"); | |||
InputStream in=httpConn.getInputStream(); | |||
BufferedReader read=new BufferedReader(new InputStreamReader(in)); | |||
StringBuffer sb=new StringBuffer(); | |||
while (true){ | |||
String line=read.readLine(); | |||
if (line==null) break; | |||
sb.append(line+"\r\n"); | |||
} | |||
return moin2wm(sb.toString(),urladdress,dict); | |||
} | |||
public String moin2wm(String html,String url,String dict) throws UnsupportedEncodingException{ | |||
String text=html; | |||
String ex="UbuntuHelp"; | |||
String turl="https://help.ubuntu.com/community/"; | |||
if (url.startsWith("https://wiki")){ | |||
ex="UbuntuWiki"; | |||
turl="https://wiki.ubuntu.com/"; | |||
} | |||
String head="{{From|"+url.substring(0,url.indexOf("?"))+"}}\r\n{{Languages|"+ex+":"+dict+"}}\r\n"; | |||
//删除注释 | |||
text=text.replaceAll("\r\n##(.*)",""); | |||
text=text.replaceAll("\r\n#format(.*)",""); | |||
text=text.replaceAll("\r\n#language(.*)",""); | |||
text=text.replaceAll("\r\n#pragma(.*)",""); | |||
text=text.replaceAll("\r\n#acl(.*)",""); | |||
text=text.replaceAll("^##(.*)\r\n",""); | |||
text=text.replaceAll("^#format(.*)\r\n",""); | |||
text=text.replaceAll("^#language(.*)\r\n",""); | |||
text=text.replaceAll("^#pragma(.*)\r\n",""); | |||
text=text.replaceAll("^#acl(.*)\r\n",""); | |||
//替换#REDIRECT PDFPrinting => #REDIRECT [[PDFPrinting]] | |||
text=text.replaceAll("#REDIRECT (\\S*)","#REDIRECT "+"[["+ex+":$1]]"); | |||
text=text.replaceAll("#redirect (\\S*)","#REDIRECT "+"[["+ex+":$1]]"); | |||
//#refresh 0 https://wiki.ubuntu.com/ASUS_A3H_5010_Laptop_with_Ubuntu | |||
text=text.replaceAll("#REFRESH (.*?) (\\S*)","#REDIRECT "+"[["+ex+":$2]]"); | |||
text=text.replaceAll("#refresh (.*?) (\\S*)","#REDIRECT "+"[["+ex+":$2]]"); | |||
//删除主题 | |||
text=text.replaceAll(".*TableOfContents.*",""); | |||
//标题从二开始 | |||
text=text.replaceAll("= (.*?) =","== $1 =="); | |||
//转化List | |||
text=replaceList(text); | |||
//[[BR]] -> <BR> | |||
text=text.replaceAll("\\[\\[BR\\]\\]","<br>"); | |||
//link convert superscripted - ^ * ^ -> <sup> * </sup> | |||
text=text.replaceAll("\\^(.*)\\^","<sup>$1</sup>"); | |||
//link convert subscripted - ,, * ,, -> <sub> * </sub> | |||
text=text.replaceAll(",,(.*?),,","<sub>$1</sub>"); | |||
//link convert - [" * "] -> [[ UbuntuHelp: * ]] | |||
text=text.replaceAll("\\[\"(.*?)\"\\]","[["+ex+":$1]]"); | |||
//link convert - [# * ] -> [[ * ]] | |||
text=text.replaceAll("\\[#(.*?)\\]","[[$1]]"); | |||
//link convert - [: / * : * ] -> [[ UbuntuHelp:dict\ * | * ]] | |||
text=text.replaceAll("\\[:/(.*?):(.*?)\\]","[["+ex+":"+dict+"/$1|$2]]"); | |||
//link convert - [: * : * ] -> [[ UbuntuHelp: * | * ]] | |||
text=text.replaceAll("\\[:(.*?):(.*?)\\]","[["+ex+":$1|$2]]"); | |||
//link convert - [: / * ] -> [[ UbuntuHelp: dict * ]] | |||
text=text.replaceAll("\\[:/(.*?)\\]","[["+ex+":"+dict+"/$1]]"); | |||
//link convert - [: * ] -> [[ UbuntuHelp: * ]] | |||
text=text.replaceAll("\\[:(.*?)\\]","[["+ex+":$1]]"); | |||
//link convert - wiki:cat -> [[UbuntuWiki:cat]] | |||
text=text.replaceAll(" wiki:(\\S*)"," [[UbuntuWiki:$1]]"); | |||
text=text.replaceAll("\r\nwiki:(\\S*)","\r\n[[UbuntuWiki:$1]]"); | |||
//link convert - [wiki:cat * ] -> [[UbuntuWiki:cat| * ]] | |||
text=text.replaceAll("\\[wiki:(.*?)\\ (.*?)\\]","[[UbuntuWiki:$1|$2]]"); | |||
//link convert - [wiki:cat * ] -> [[UbuntuWiki:cat| * ]] | |||
text=text.replaceAll("\\[wiki:(.*?)\\]","[[UbuntuWiki:$1]]"); | |||
//link convert - [UbuntuWiki:\*] -> [UbuntuWiki:dict\*] | |||
text=text.replaceAll("\\[UbuntuWiki:\\\\(.*?)\\]","[UbuntuWiki:"+dict+"\\$1]"); | |||
//link convert - [UbuntuHelp:\*] -> [UbuntuHelp:dict\*] | |||
text=text.replaceAll("\\[UbuntuHelp:\\\\(.*?)\\]","[UbuntuHelp:"+dict+"\\$1]"); | |||
//link convert - __ * __ -> <u> * </u> | |||
text=text.replaceAll("__(.*?)__","<u>$1</u>"); | |||
//link convert - {{{ * }}} -> <code><nowiki> * </nowiki></code> | |||
text=text.replaceAll("\\{\\{\\{(.*?)\\}\\}\\}","<code><nowiki>$1</nowiki></code>"); | |||
//link convert - \r\n {{{ * -> \r\n<pre><nowiki> * | |||
text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","\r\n<pre><nowiki>$2"); | |||
//link convert - {{{ * -> <pre><nowiki> * | |||
text=text.replaceAll("\\{\\{\\{(.*)","\r\n<pre><nowiki>$1"); | |||
//link convert - * }}} -> * <\pre><\nowiki> | |||
text=text.replaceAll("(.*?)\\}\\}\\}","$1</nowiki></pre>"); | |||
//CategoryHomepage =>[[category:"+ex+"]]; | |||
text=text.replaceAll("Category(\\S*)","[[category:Category$1]]"); | |||
text=text.replaceAll("\r\n( *)","\r\n"); | |||
text=replaceUrl(text,turl,dict); | |||
text=tableConv(text); | |||
//[[xxx:http]] => [[http:]] | |||
text=text.replaceAll("\\[\\[(.*?):http(.*?)\\]\\]","[[http$2]]"); | |||
//[[xxx:ftp]] => [[ftp:]] | |||
text=text.replaceAll("\\[\\[(.*?):ftp(.*?)\\]\\]","[[ftp$2]]"); | |||
//[[https://wiki.ubuntu.com/*]] => [[UbuntuWiki:]] | |||
text=text.replaceAll("\\[\\[https://wiki\\.ubuntu\\.com/(.*?)\\]\\]","[[UbuntuWiki:$1]]"); | |||
//[[https://help.ubuntu.com/community/*]] => [[UbuntuHelp:]] | |||
text=text.replaceAll("\\[\\[https://help\\.ubuntu\\.com/community/(.*?)\\]\\]","[[UbuntuHelp:$1]]"); | |||
//[[http://wiki.ubuntu.com/*]] => [[UbuntuWiki:]] | |||
text=text.replaceAll("\\[\\[http://wiki\\.ubuntu\\.com/(.*?)\\]\\]","[[UbuntuWiki:$1]]"); | |||
//[[http://help.ubuntu.com/community/*]] => [[UbuntuHelp:]] | |||
text=text.replaceAll("\\[\\[http://help\\.ubuntu\\.com/community/(.*?)\\]\\]","[[UbuntuHelp:$1]]"); | |||
String foot="\r\n[[category:"+ex+"]]"; | |||
if (text.trim().startsWith("#REDIRECT")){ | |||
System.out.print(dict + " is redirect :"+text.trim()); | |||
return text+head+foot; | |||
} | |||
if (text.trim().length()<10){ | |||
return ""; | |||
} | |||
return head+text+foot; | |||
} | |||
public String replaceList(String text){ | |||
text=text.replaceAll("\r\n \\. (.*?)","\r\n* $1"); | |||
text=text.replaceAll("\r\n \\. (.*?)","\r\n** $1"); | |||
text=text.replaceAll("\r\n \\. (.*?)","\r\n*** $1"); | |||
text=text.replaceAll("\r\n \\. (.*?)","\r\n**** $1"); | |||
text=text.replaceAll("\r\n \\. (.*?)","\r\n***** $1"); | |||
text=text.replaceAll("\r\n \\. (.*?)","\r\n****** $1"); | |||
text=text.replaceAll("\r\n \\*(.*?)","\r\n*$1"); | |||
text=text.replaceAll("\r\n \\*(.*?)","\r\n**$1"); | |||
text=text.replaceAll("\r\n \\*(.*?)","\r\n***$1"); | |||
text=text.replaceAll("\r\n \\*(.*?)","\r\n****$1"); | |||
text=text.replaceAll("\r\n \\*(.*?)","\r\n*****$1"); | |||
text=text.replaceAll("\r\n \\*(.*?)","\r\n******$1"); | |||
// text=text.replaceAll("\r\n \\. (.*?)","\r\n#: $1"); | |||
// text=text.replaceAll("\r\n \\. (.*?)","\r\n##: $1"); | |||
// text=text.replaceAll("\r\n \\. (.*?)","\r\n###: $1"); | |||
// text=text.replaceAll("\r\n \\. (.*?)","\r\n####: $1"); | |||
// text=text.replaceAll("\r\n \\. (.*?)","\r\n#####: $1"); | |||
// text=text.replaceAll("\r\n \\. (.*?)","\r\n######: $1"); | |||
// text=text.replaceAll("\r\n \\*(.*?)","\r\n#$1"); | |||
// text=text.replaceAll("\r\n \\*(.*?)","\r\n##$1"); | |||
// text=text.replaceAll("\r\n \\*(.*?)","\r\n###$1"); | |||
// text=text.replaceAll("\r\n \\*(.*?)","\r\n####$1"); | |||
// text=text.replaceAll("\r\n \\*(.*?)","\r\n#####$1"); | |||
// text=text.replaceAll("\r\n \\*(.*?)","\r\n######$1"); | |||
for (int i=1;i<20;i++){ | |||
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#$1"); | |||
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n##$1"); | |||
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n###$1"); | |||
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n####$1"); | |||
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#####$1"); | |||
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n######$1"); | |||
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*$1"); | |||
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n**$1"); | |||
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n***$1"); | |||
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n****$1"); | |||
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*****$1"); | |||
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n******$1"); | |||
} | |||
return text; | |||
} | |||
public String replaceUrl(String text,String baseurl,String dict) throws UnsupportedEncodingException{ | |||
//attachment:IconsPage/info.png -> https://help.ubuntu.com/community/IconsPage?action=AttachFile&do=get&target=info.png | |||
while (true){ | |||
Pattern pattern= Pattern.compile("attachment:(.*?)/(\\S*)"); | |||
Matcher matcher=pattern.matcher(text); | |||
String replace; | |||
if (matcher.find()) { | |||
replace=baseurl+matcher.group(1)+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(2), "UTF-8"); | |||
text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); | |||
continue; | |||
} | |||
pattern= Pattern.compile("attachment:(\\S*)"); | |||
matcher=pattern.matcher(text); | |||
if(matcher.find()) { | |||
replace=baseurl+dict+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(1), "UTF-8"); | |||
text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); | |||
continue; | |||
} | |||
break; | |||
} | |||
return text; | |||
} | |||
public String tableConv(String html){ | |||
//||a||b||c|| -> {| | |||
//||d||e||f|| |a||b||c | |||
// |- | |||
// |d||e||f | |||
// |} | |||
String[] lines=html.split("\r\n"); | |||
String block = ""; | |||
StringBuffer bf= new StringBuffer(); | |||
boolean start=false; | |||
for (int i=0;i<lines.length;i++){ | |||
String line=lines[i].trim(); | |||
if (line.startsWith("||")){ | |||
if (! start){ | |||
start=true; | |||
String str=line.substring(1,line.length()-2); | |||
str=str.replaceAll("<bgcolor.*?>",""); | |||
str=str.replaceAll("<style.*?>",""); | |||
str=str.replaceAll("<rowbgcolor.*?>",""); | |||
block="{|border=\"1\" cellspacing=\"0\"\r\n"+str; | |||
}else{ | |||
String str=line.substring(1,line.length()-2); | |||
str=str.replaceAll("<bgcolor.*?>",""); | |||
str=str.replaceAll("<style.*?>",""); | |||
str=str.replaceAll("<rowbgcolor.*?>",""); | |||
block=block+"\r\n|-\r\n"+str; | |||
} | |||
}else{ | |||
if (start){ | |||
block=block+"\r\n|}\r\n"; | |||
bf.append(block); | |||
start=false; | |||
} | |||
bf.append(lines[i]+"\r\n"); | |||
} | |||
} | |||
if (start){ | |||
block=block+"\r\n|}\r\n"; | |||
bf.append(block); | |||
start=false; | |||
} | |||
return bf.toString(); | |||
} | |||
public void putText(String dict,String html,String surl) throws MalformedURLException, IOException{ | |||
URL url; | |||
URLConnection conn; | |||
InputStream in; | |||
BufferedReader read; | |||
StringBuffer sb; | |||
if (html.length()<5){ | |||
System.out.print(" is short:"+html); | |||
return; | |||
} | |||
// if (html.length()<300){ | |||
// if (html.toUpperCase().trim().indexOf("REFRESH")>0) { | |||
// System.out.println(dict+" is REFRESH."); | |||
// return; | |||
// } | |||
// if (html.toUpperCase().trim().indexOf("REDIRECT")>0) { | |||
// System.out.println(dict+" is REDIRECT."); | |||
// return; | |||
// } | |||
// } | |||
String ex="UbuntuHelp"; | |||
if (surl.startsWith("https://wiki")){ | |||
ex="UbuntuWiki"; | |||
} | |||
try{ | |||
url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=raw"); | |||
conn = url.openConnection(); | |||
conn.setReadTimeout(60000); | |||
conn.setRequestProperty("Cookie", cookie); | |||
in=conn.getInputStream(); | |||
read=new BufferedReader(new InputStreamReader(in)); | |||
sb=new StringBuffer(); | |||
while (true){ | |||
String line=read.readLine(); | |||
if (line==null) break; | |||
sb.append(line+"\r\n"); | |||
} | |||
if (sb.toString().trim().length()==html.trim().length()) { | |||
System.out.print(" no changes"); | |||
return; | |||
} | |||
}catch(Exception ex0){ | |||
//nothing | |||
} | |||
url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=edit"); | |||
conn = url.openConnection(); | |||
conn.setReadTimeout(60000); | |||
conn.setRequestProperty("Connection", "Keep-Alive"); | |||
conn.setRequestProperty("Cookie", cookie); | |||
conn.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)"); | |||
conn.setRequestProperty("Accept","text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); | |||
in=conn.getInputStream(); | |||
read=new BufferedReader(new InputStreamReader(in)); | |||
sb=new StringBuffer(); | |||
while (true){ | |||
String line=read.readLine(); | |||
if (line==null) break; | |||
sb.append(line+"\r\n"); | |||
} | |||
int start=sb.indexOf("<form id=\"editform\""); | |||
int end=sb.indexOf("<div class=\"printfooter\">"); | |||
String from; | |||
try{ | |||
from=sb.substring(start,end); | |||
}catch(Exception ex0){ | |||
System.out.println(dict+" error,please set cookie!"); | |||
System.out.println(sb); | |||
return; | |||
} | |||
Map<String,String> map=getPostDate(from); | |||
Iterator<String> iterator=map.keySet().iterator(); | |||
url = new URL("http://wiki.ubuntu.org.cn/index.php?title="+ex+":"+URLEncoder.encode(dict, "UTF-8")+"&action=submit"); | |||
HttpURLConnection conn2 = (HttpURLConnection) url.openConnection(); | |||
String boundary="---------------------------167593640336579986891120154"; | |||
conn2.setReadTimeout(60000); | |||
conn2.setDoOutput(true); | |||
conn2.setAllowUserInteraction(false); | |||
conn2.setRequestMethod("POST"); | |||
conn2.setRequestProperty("Cookie", cookie); | |||
conn2.setRequestProperty("Content-Type", "multipart/form-data; boundary="+boundary); | |||
conn2.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)"); | |||
conn2.setRequestProperty("Accept","text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); | |||
conn2.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5"); | |||
conn2.setRequestProperty("Referer", "http://wiki.ubuntu.org.cn/index.php?title=UbuntuHelp:"+URLEncoder.encode(dict, "UTF-8")+"&action=edit"); | |||
conn2.setRequestProperty("Accept-Charset", "UTF-8,*"); | |||
conn2.setRequestProperty("Connection", "Keep-Alive"); | |||
StringBuffer content=new StringBuffer(); | |||
while (iterator.hasNext()){ | |||
String name=iterator.next(); | |||
if (name.equals("wpPreview")) continue; | |||
if (name.equals("wpDiff")) continue; | |||
if (name.equals("wpWatchthis")) continue; | |||
String value=""; | |||
if (name.equals("wpTextbox1")){ | |||
value=html; | |||
}else { | |||
value=map.get(name); | |||
} | |||
content.append("--"+boundary+"\r\n"); | |||
content.append("Content-Disposition: form-data; name=\""+name+"\"\r\n\r\n"); | |||
content.append(value+"\r\n"); | |||
} | |||
content.append("--"+boundary+"--\r\n\r\n"); | |||
byte[] data=content.toString().getBytes(); | |||
conn2.setRequestProperty("Content-Length", String.valueOf(data.length)); | |||
OutputStream out = conn2.getOutputStream(); | |||
out.write(data); | |||
out.flush(); | |||
// Get the response | |||
try{ | |||
BufferedReader rd = new BufferedReader(new InputStreamReader(conn2.getInputStream())); | |||
String line; | |||
while ((line = rd.readLine()) != null) { | |||
//System.out.println(line); | |||
// Nothing | |||
break; | |||
} | |||
rd.close(); | |||
}catch(Exception ex0){ | |||
System.out.println(" add. but has a error:"+ex0.getMessage()); | |||
out.close(); | |||
return; | |||
} | |||
out.close(); | |||
// System.out.println(dict+" add."); | |||
}; | |||
private Map<String,String> getPostDate(String from){ | |||
Map<String,String> map=new HashMap<String,String>(); | |||
Pattern pattern= Pattern.compile("<input(.*?)/>"); | |||
Matcher matcher=pattern.matcher(from); | |||
Pattern pname=Pattern.compile("name=[\"|'](.*?)[\"|']"); | |||
Pattern pvalue=Pattern.compile("value=[\"|'](.*?)[\"|']"); | |||
while(matcher.find()) { | |||
String name,value; | |||
String input=matcher.group(1); | |||
Matcher mname=pname.matcher(input); | |||
Matcher mvalue=pvalue.matcher(input); | |||
if (mname.find()){ | |||
name=mname.group(1); | |||
}else{ | |||
continue; | |||
}; | |||
if (mvalue.find()){ | |||
value=mvalue.group(1); | |||
}else{ | |||
if (input.indexOf("checkbox")>0){ | |||
value="0"; | |||
}else{ | |||
value=""; | |||
} | |||
}; | |||
map.put(name,value); | |||
} | |||
// int start=from.indexOf("cols='80' style=\"width:100%\" >"); | |||
// int end=from.indexOf("</textarea>"); | |||
// | |||
// map.put("wpTextbox1",from.substring(start,end)); | |||
map.put("wpTextbox1",""); | |||
return map; | |||
} | |||
public void helpstart() throws IOException{ | |||
clearDict(); | |||
// String dict="community/"; | |||
// String out = getUrl("https://help.ubuntu.com/"+dict+"?action=raw",dict); | |||
// getDicts(out); | |||
// putText(dict,out,"https://help.ubuntu.com/community/"); | |||
InputStream in=null; | |||
File saveFile=new File("/tmp/helpindex.html"); | |||
if (saveFile.exists()){ | |||
in=new FileInputStream(saveFile); | |||
}else{ | |||
URL url=new URL("https://help.ubuntu.com/community/TitleIndex"); | |||
in=url.openConnection().getInputStream(); | |||
} | |||
BufferedReader read=new BufferedReader(new InputStreamReader(in)); | |||
StringBuffer sb=new StringBuffer(); | |||
while (true){ | |||
String line=read.readLine(); | |||
if (line==null) break; | |||
sb.append(line+"\r\n"); | |||
} | |||
read.close(); | |||
if (!saveFile.exists()){ | |||
FileOutputStream out=new FileOutputStream(saveFile); | |||
out.write(sb.toString().getBytes()); | |||
out.flush(); | |||
out.close(); | |||
} | |||
// URL url=new URL("https://help.ubuntu.com/community/TitleIndex"); | |||
// InputStream in=url.openConnection().getInputStream(); | |||
// BufferedReader read=new BufferedReader(new InputStreamReader(in)); | |||
// StringBuffer sb=new StringBuffer(); | |||
// while (true){ | |||
// String line=read.readLine(); | |||
// if (line==null) break; | |||
// sb.append(line+"\r\n"); | |||
// } | |||
String html=sb.substring(sb.indexOf("<a name=\"3\">"),sb.indexOf("<a name=\"%5b\">")); | |||
Pattern pattern= Pattern.compile("<a href=\"/community/(.*?)\">"); | |||
Matcher matcher=pattern.matcher(html); | |||
while(matcher.find()) { | |||
String input=matcher.group(1); | |||
addDict(input); | |||
System.out.println(input); | |||
} | |||
String dict; | |||
String out; | |||
System.out.println("一共需要转换 "+String.valueOf(addDict.size())+" 篇文章。"); | |||
while (addDict.size()>0){ | |||
// if (addDict.size()==0) break; | |||
// dict=addDict.get(addDict.size()-1); | |||
dict=addDict.get(0); | |||
System.out.print(String.valueOf(addDict.size())+" "+dict); | |||
try{ | |||
// if (addDict.size()>1500) continue; | |||
try { | |||
System.out.print(" read"); | |||
out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); | |||
System.out.print(" ."); | |||
}catch(Exception ex){ | |||
try { | |||
System.out.print(" read again"); | |||
out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); | |||
System.out.print(" ."); | |||
}catch(Exception ex2){ | |||
continue; | |||
} | |||
} | |||
try { | |||
System.out.print(" get dict"); | |||
getDicts(out); | |||
System.out.print(" . put"); | |||
putText(dict,out,"https://help.ubuntu.com/community/"+dict); | |||
System.out.print(" .\r\n"); | |||
}catch(Exception ex){ | |||
System.out.println(dict+" error:"+ex.getMessage()); | |||
continue; | |||
} | |||
}finally{ | |||
delDict(0); | |||
} | |||
} | |||
} | |||
public void wikistart() throws MalformedURLException, IOException{ | |||
clearDict(); | |||
InputStream in=null; | |||
File saveFile=new File("/tmp/wikiindex.html"); | |||
if (saveFile.exists()){ | |||
in=new FileInputStream(saveFile); | |||
}else{ | |||
URL url=new URL("https://wiki.ubuntu.com/TitleIndex"); | |||
in=url.openConnection().getInputStream(); | |||
} | |||
BufferedReader read=new BufferedReader(new InputStreamReader(in)); | |||
StringBuffer sb=new StringBuffer(); | |||
while (true){ | |||
String line=read.readLine(); | |||
if (line==null) break; | |||
sb.append(line+"\r\n"); | |||
} | |||
read.close(); | |||
if (!saveFile.exists()){ | |||
FileOutputStream out=new FileOutputStream(saveFile); | |||
out.write(sb.toString().getBytes()); | |||
out.flush(); | |||
out.close(); | |||
} | |||
String html=sb.substring(sb.indexOf("<a name=\"0\">"),sb.indexOf("<a name=\"%5b\">")); | |||
Pattern pattern= Pattern.compile("<a href=\"/(.*?)\">"); | |||
Matcher matcher=pattern.matcher(html); | |||
while(matcher.find()) { | |||
String input=matcher.group(1); | |||
addDict(input); | |||
System.out.println(input); | |||
} | |||
String dict; | |||
String out; | |||
System.out.println("一共需要转换 "+String.valueOf(addDict.size())+" 篇文章。"); | |||
while (addDict.size()>0){ | |||
dict=addDict.get(0); | |||
System.out.print(String.valueOf(addDict.size())+" "+dict); | |||
try{ | |||
// if (addDict.size()>12285) continue; | |||
try { | |||
System.out.print(" read"); | |||
out=getUrl("https://wiki.ubuntu.com/"+dict+"?action=raw",dict); | |||
System.out.print(" ."); | |||
}catch(Exception ex){ | |||
ex.printStackTrace(); | |||
try { | |||
System.out.print(" read again"); | |||
out=getUrl("https://wiki.ubuntu.com/"+dict+"?action=raw",dict); | |||
System.out.print(" ."); | |||
}catch(Exception ex2){ | |||
ex2.printStackTrace(); | |||
continue; | |||
} | |||
} | |||
try { | |||
System.out.print(" get dict"); | |||
getDicts(out); | |||
System.out.print(" . put"); | |||
putText(dict,out,"https://wiki.ubuntu.com/"+dict); | |||
System.out.print(" .\r\n"); | |||
}catch(Exception ex){ | |||
System.out.println(dict+" error:"+ex.getMessage()); | |||
} | |||
}finally{ | |||
delDict(0); | |||
} | |||
} | |||
} | |||
public void test() throws IOException{ | |||
// String sss="\r\ndd attachment:IconsPage/info.png ClamAV can only\r\n"; | |||
// sss=sss.replaceAll("attachment:(.*?)/(.*?) ","https://help.ubuntu.com/community/$1?action=AttachFile&do=get&target=$2 "); | |||
// System.out.println(sss); | |||
// String dict="RestrictedFormats"; | |||
// String out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); | |||
// System.out.println(out); | |||
// String text="d CategoryHome dd"; | |||
// text=text.replaceAll("Category(\\S*)","[[category:Category$1]]"); | |||
// System.out.println(text); | |||
// String out=tableConv("dddd\r\n||xxx||nnn||ddd||\r\n||dd||xxdee||dd||\r\nxdd"); | |||
// System.out.println(out); | |||
// String text="#title User Documentation\r\n##Please discuss major/structural changes to this page on the Documentation team mailing list at: http://lists.ubuntu.com/mailman/listinfo/ubuntu-doc\r\n##If you want to get involved with editing and organising the Wiki please visit DocumentationTeam.\r\n## This page is designed to remain mostly static - make and propose changes to the pages that are linked to from this page\r\n## For help on contributing to the wiki, see the WikiGuide\r\n||<tablestyl"; | |||
// text=text.replaceAll("\r\n#(.*)",""); | |||
// text=text.replaceAll("^#(.*?)\r\n",""); | |||
// System.out.println(text); | |||
// String text="sss\r\ndddf{{{dxx\r\n}}}\r\n . {{{ddd}}}\r\n .{{{ddd}}}\r\n{{{de}}}"; | |||
// //link convert - \r\n {{{ * -> \r\n<pre><nowiki> * | |||
// text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","\r\n<pre><nowiki>$2"); | |||
//link convert - {{{ * -> <pre><nowiki> * | |||
// System.out.println(text); | |||
// text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","<pre><nowiki>$2"); | |||
//link convert - {{{ * -> <pre><nowiki> * | |||
// text=text.replaceAll("\\{\\{\\{(.*)","\r\n<pre><nowiki>$1"); | |||
// //link convert - * }}} -> * <\pre><\nowiki> | |||
// text=text.replaceAll("(.*?)\\}\\}\\}","$1</nowiki></pre>"); | |||
// | |||
// System.out.println(text); | |||
// getCookie(); | |||
// String url="http://bingniu.3322.org/mywiki/OpenLDAPAdminGuide/SecurityConsideration"; | |||
// String out=getUrl(url+"?action=raw","UbuntuManual"); | |||
// System.out.println(out); | |||
// BufferedReader read=new BufferedReader(new FileReader("/home/wangpian/a1.txt")); | |||
// String s=""; | |||
// StringBuffer str=new StringBuffer(); | |||
// while (true){ | |||
// s=read.readLine(); | |||
// if (s==null)break; | |||
// str.append(s+"\r\n"); | |||
// } | |||
// s=str.toString(); | |||
// s=s.replaceAll("\\[\\[\\[.*?\\]\\]\\]",""); | |||
// | |||
// System.out.println(s.replaceAll("\\[\\[UbuntuHelp(.*?)\\|(.*?)\\]\\]","$2")); | |||
} | |||
/** | |||
* @param args the command line arguments | |||
*/ | |||
public static void main(String[] args) { | |||
// TODO code application logic here | |||
Main main=new Main(); | |||
try { | |||
// main.test(); | |||
// main.helpstart(); | |||
main.wikistart(); | |||
}catch (Exception ex){ | |||
ex.printStackTrace(); | |||
} | |||
} | |||
} | |||
</nowiki> |
2007年11月23日 (五) 16:21的版本
机器人,自动将 http://help.ubuntu.com 和 http://wiki.ubuntu.com 由 monimoni 格式转换到 mediawiki 格式,并自动更新和发布的小程序。 由java写成。
/*
* Main.java
*
* Created on 2007年5月12日, 下午1:31
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package wiki;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.security.GeneralSecurityException;
import java.security.Security;
import java.security.cert.X509Certificate;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.X509TrustManager;
/**
*
* @author oneleaf
*/
public class Main {
List <String> addDict= new Vector<String>();
List <String> oldDict= new Vector<String>();
String cookie=getCookie();
private String getCookie(){
String cookie = "";
try{
URL httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin");
HttpURLConnection httpConn = (HttpURLConnection)httpurl.openConnection();
httpConn.addRequestProperty("Cookie",cookie);
httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
httpConn.setRequestProperty("Connection", "Keep-Alive");
httpConn.setUseCaches(false);
cookie=httpConn.getHeaderField("Set-Cookie");
String data="wpName=wikibot&wpPassword=********&wpRemember=1";
httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin&action=submitlogin&type=login");
httpConn = (HttpURLConnection)httpurl.openConnection();
httpConn.setRequestMethod("POST");
httpConn.addRequestProperty("Cookie",cookie);
httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
httpConn.setRequestProperty("Content-Language","UTF-8" );
httpConn.setRequestProperty("Content-Length", ""+data.getBytes().length);
httpConn.setRequestProperty("Connection", "Keep-Alive");
httpConn.setDoOutput(true);
httpConn.setDoInput(true);
httpConn.setUseCaches(false);
DataOutputStream outStream = new DataOutputStream(httpConn.getOutputStream());
outStream.writeBytes(data);
outStream.flush();
outStream.close();
List <String> list=httpConn.getHeaderFields().get("Set-Cookie");
for (int i=0;i<list.size();i++){
cookie=cookie+"; "+list.get(i);
}
// Iterator iter=httpConn.getHeaderFields().keySet().iterator();
// while (iter.hasNext()){
// String key=(String) iter.next();
// List list=httpConn.getHeaderFields().get(key);
// System.out.print(key+": ");
// for (int i=0;i<list.size();i++){
// System.out.print(list.get(i));
// }
// System.out.print("\n");
// }
//
// cookie=httpConn.getHeaderField("Set-Cookie");
// System.out.println("Cookie_2:"+cookie);
// BufferedReader in = new BufferedReader(new InputStreamReader(httpConn.getInputStream()));
// String line;
// while ((line = in.readLine())!= null){
// System.out.println(line);
// // result += line+"\n";
// }
// in.close();
} catch (Exception ex){
ex.printStackTrace();
}
return cookie;
}
private void addDict(String dict){
String str=dict.trim();
if (dict.startsWith("/")){
str=dict.substring(1);
}else if (dict.indexOf("#")>0){
str=dict.substring(0,dict.indexOf("#"));
}else if (dict.indexOf("?")>0){
str=dict.substring(0,dict.indexOf("?"));
}else if (dict.startsWith("./")){
str=dict.substring(2);
}else if (dict.startsWith("../")){
str=dict.substring(3);
}
if (str.toLowerCase().indexOf("team")>0) return;
if (str.trim().length()==0) return;
if (str.trim().length()>=256) return;
//../CommandLine
if (oldDict.contains(str)) return;
if (addDict.contains(str)) return;
addDict.add(str);
}
private void delDict(int dictindex){
oldDict.add(addDict.get(dictindex));
addDict.remove(dictindex);
}
private void clearDict(){
addDict.clear();
oldDict.clear();
}
private void getDicts(String html){
Pattern pattern= Pattern.compile("\\[UbuntuHelp:(.*?)\\]");
Matcher matcher=pattern.matcher(html);
while(matcher.find()) {
String line=matcher.group(1);
if (line.indexOf("|")>0){
addDict(line.substring(0,line.indexOf("|")));
}else{
addDict(line);
}
}
}
/** Creates a new instance of Main */
public Main() {
SSLContext sslContext = null;
try {
sslContext = SSLContext.getInstance("TLS");
X509TrustManager[] xtmArray = new X509TrustManager[] { xtm };
sslContext.init(null, xtmArray, new java.security.SecureRandom());
} catch(GeneralSecurityException gse) {
}
if(sslContext != null) {
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory());
}
HttpsURLConnection.setDefaultHostnameVerifier(hnv);
}
private X509TrustManager xtm = new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] chain, String authType) {}
public void checkServerTrusted(X509Certificate[] chain, String authType) {}
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
private HostnameVerifier hnv = new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
};
public String getUrl(String urladdress,String dict) throws IOException{
URL url=new URL(urladdress);
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
httpConn.setReadTimeout(60000);
httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)");
httpConn.setRequestProperty("Content-Language","UTF-8" );
httpConn.setRequestProperty("Connection", "Keep-Alive");
InputStream in=httpConn.getInputStream();
BufferedReader read=new BufferedReader(new InputStreamReader(in));
StringBuffer sb=new StringBuffer();
while (true){
String line=read.readLine();
if (line==null) break;
sb.append(line+"\r\n");
}
return moin2wm(sb.toString(),urladdress,dict);
}
public String moin2wm(String html,String url,String dict) throws UnsupportedEncodingException{
String text=html;
String ex="UbuntuHelp";
String turl="https://help.ubuntu.com/community/";
if (url.startsWith("https://wiki")){
ex="UbuntuWiki";
turl="https://wiki.ubuntu.com/";
}
String head="{{From|"+url.substring(0,url.indexOf("?"))+"}}\r\n{{Languages|"+ex+":"+dict+"}}\r\n";
//删除注释
text=text.replaceAll("\r\n##(.*)","");
text=text.replaceAll("\r\n#format(.*)","");
text=text.replaceAll("\r\n#language(.*)","");
text=text.replaceAll("\r\n#pragma(.*)","");
text=text.replaceAll("\r\n#acl(.*)","");
text=text.replaceAll("^##(.*)\r\n","");
text=text.replaceAll("^#format(.*)\r\n","");
text=text.replaceAll("^#language(.*)\r\n","");
text=text.replaceAll("^#pragma(.*)\r\n","");
text=text.replaceAll("^#acl(.*)\r\n","");
//替换#REDIRECT PDFPrinting => #REDIRECT [[PDFPrinting]]
text=text.replaceAll("#REDIRECT (\\S*)","#REDIRECT "+"[["+ex+":$1]]");
text=text.replaceAll("#redirect (\\S*)","#REDIRECT "+"[["+ex+":$1]]");
//#refresh 0 https://wiki.ubuntu.com/ASUS_A3H_5010_Laptop_with_Ubuntu
text=text.replaceAll("#REFRESH (.*?) (\\S*)","#REDIRECT "+"[["+ex+":$2]]");
text=text.replaceAll("#refresh (.*?) (\\S*)","#REDIRECT "+"[["+ex+":$2]]");
//删除主题
text=text.replaceAll(".*TableOfContents.*","");
//标题从二开始
text=text.replaceAll("= (.*?) =","== $1 ==");
//转化List
text=replaceList(text);
//[[BR]] -> <BR>
text=text.replaceAll("\\[\\[BR\\]\\]","<br>");
//link convert superscripted - ^ * ^ -> <sup> * </sup>
text=text.replaceAll("\\^(.*)\\^","<sup>$1</sup>");
//link convert subscripted - ,, * ,, -> <sub> * </sub>
text=text.replaceAll(",,(.*?),,","<sub>$1</sub>");
//link convert - [" * "] -> [[ UbuntuHelp: * ]]
text=text.replaceAll("\\[\"(.*?)\"\\]","[["+ex+":$1]]");
//link convert - [# * ] -> [[ * ]]
text=text.replaceAll("\\[#(.*?)\\]","[[$1]]");
//link convert - [: / * : * ] -> [[ UbuntuHelp:dict\ * | * ]]
text=text.replaceAll("\\[:/(.*?):(.*?)\\]","[["+ex+":"+dict+"/$1|$2]]");
//link convert - [: * : * ] -> [[ UbuntuHelp: * | * ]]
text=text.replaceAll("\\[:(.*?):(.*?)\\]","[["+ex+":$1|$2]]");
//link convert - [: / * ] -> [[ UbuntuHelp: dict * ]]
text=text.replaceAll("\\[:/(.*?)\\]","[["+ex+":"+dict+"/$1]]");
//link convert - [: * ] -> [[ UbuntuHelp: * ]]
text=text.replaceAll("\\[:(.*?)\\]","[["+ex+":$1]]");
//link convert - wiki:cat -> [[UbuntuWiki:cat]]
text=text.replaceAll(" wiki:(\\S*)"," [[UbuntuWiki:$1]]");
text=text.replaceAll("\r\nwiki:(\\S*)","\r\n[[UbuntuWiki:$1]]");
//link convert - [wiki:cat * ] -> [[UbuntuWiki:cat| * ]]
text=text.replaceAll("\\[wiki:(.*?)\\ (.*?)\\]","[[UbuntuWiki:$1|$2]]");
//link convert - [wiki:cat * ] -> [[UbuntuWiki:cat| * ]]
text=text.replaceAll("\\[wiki:(.*?)\\]","[[UbuntuWiki:$1]]");
//link convert - [UbuntuWiki:\*] -> [UbuntuWiki:dict\*]
text=text.replaceAll("\\[UbuntuWiki:\\\\(.*?)\\]","[UbuntuWiki:"+dict+"\\$1]");
//link convert - [UbuntuHelp:\*] -> [UbuntuHelp:dict\*]
text=text.replaceAll("\\[UbuntuHelp:\\\\(.*?)\\]","[UbuntuHelp:"+dict+"\\$1]");
//link convert - __ * __ -> <u> * </u>
text=text.replaceAll("__(.*?)__","<u>$1</u>");
//link convert - {{{ * }}} -> <code><nowiki> *
text=text.replaceAll("\\{\\{\\{(.*?)\\}\\}\\}","$1
");
//link convert - \r\n {{{ * -> \r\n
* text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","\r\n<pre><nowiki>$2"); //link convert - {{{ * -> <pre><nowiki> * text=text.replaceAll("\\{\\{\\{(.*)","\r\n<pre><nowiki>$1"); //link convert - * }}} -> * <\pre><\nowiki> text=text.replaceAll("(.*?)\\}\\}\\}","$1
");
//CategoryHomepage =>; text=text.replaceAll("Category(\\S*)",""); text=text.replaceAll("\r\n( *)","\r\n"); text=replaceUrl(text,turl,dict); text=tableConv(text); //xxx:http => http: text=text.replaceAll("\\[\\[(.*?):http(.*?)\\]\\]","http$2"); //xxx:ftp => ftp: text=text.replaceAll("\\[\\[(.*?):ftp(.*?)\\]\\]","ftp$2"); //[[1]] => UbuntuWiki: text=text.replaceAll("\\[\\[2]\\]","UbuntuWiki:$1"); //[[3]] => UbuntuHelp: text=text.replaceAll("\\[\\[4]\\]","UbuntuHelp:$1"); //[[5]] => UbuntuWiki: text=text.replaceAll("\\[\\[6]\\]","UbuntuWiki:$1"); //[[7]] => UbuntuHelp: text=text.replaceAll("\\[\\[8]\\]","UbuntuHelp:$1"); String foot="\r\n"; if (text.trim().startsWith("#REDIRECT")){ System.out.print(dict + " is redirect :"+text.trim()); return text+head+foot; } if (text.trim().length()<10){ return ""; } return head+text+foot; } public String replaceList(String text){ text=text.replaceAll("\r\n \\. (.*?)","\r\n* $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n*** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n**** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n***** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n****** $1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n*$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n**$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n***$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n****$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n*****$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n******$1");
// text=text.replaceAll("\r\n \\. (.*?)","\r\n#: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n##: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n###: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n####: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n#####: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n######: $1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n#$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n##$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n###$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n####$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n#####$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n######$1");
for (int i=1;i<20;i++){
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n##$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n###$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n####$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#####$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n######$1");
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n**$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n***$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n****$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*****$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n******$1"); } return text; } public String replaceUrl(String text,String baseurl,String dict) throws UnsupportedEncodingException{ //attachment:IconsPage/info.png -> while (true){ Pattern pattern= Pattern.compile("attachment:(.*?)/(\\S*)"); Matcher matcher=pattern.matcher(text); String replace; if (matcher.find()) { replace=baseurl+matcher.group(1)+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(2), "UTF-8"); text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); continue; } pattern= Pattern.compile("attachment:(\\S*)"); matcher=pattern.matcher(text); if(matcher.find()) { replace=baseurl+dict+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(1), "UTF-8"); text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); continue; } break; } return text; } public String tableConv(String html){ //||a||b||c|| -> {| //||d||e||f|| |a||b||c // |- // |d||e||f // |} String[] lines=html.split("\r\n"); String block = ""; StringBuffer bf= new StringBuffer(); boolean start=false; for (int i=0;i<lines.length;i++){ String line=lines[i].trim(); if (line.startsWith("||")){ if (! start){ start=true; String str=line.substring(1,line.length()-2); str=str.replaceAll("<bgcolor.*?>",""); str=str.replaceAll("<style.*?>",""); str=str.replaceAll("<rowbgcolor.*?>",""); block="{|border=\"1\" cellspacing=\"0\"\r\n"+str; }else{ String str=line.substring(1,line.length()-2); str=str.replaceAll("<bgcolor.*?>",""); str=str.replaceAll("<style.*?>",""); str=str.replaceAll("<rowbgcolor.*?>",""); block=block+"\r\n|-\r\n"+str; } }else{ if (start){ block=block+"\r\n|}\r\n"; bf.append(block); start=false; } bf.append(lines[i]+"\r\n"); } } if (start){ block=block+"\r\n|}\r\n"; bf.append(block); start=false; } return bf.toString(); } public void putText(String dict,String html,String surl) throws MalformedURLException, IOException{ URL url; URLConnection conn; InputStream in; BufferedReader read; StringBuffer sb; if (html.length()<5){ System.out.print(" is short:"+html); return; }
// if (html.length()<300){ // if (html.toUpperCase().trim().indexOf("REFRESH")>0) { // System.out.println(dict+" is REFRESH."); // return; // } // if (html.toUpperCase().trim().indexOf("REDIRECT")>0) { // System.out.println(dict+" is REDIRECT."); // return; // } // }
String ex="UbuntuHelp"; if (surl.startsWith("https://wiki")){ ex="UbuntuWiki"; } try{ url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=raw"); conn = url.openConnection(); conn.setReadTimeout(60000); conn.setRequestProperty("Cookie", cookie); in=conn.getInputStream(); read=new BufferedReader(new InputStreamReader(in)); sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } if (sb.toString().trim().length()==html.trim().length()) { System.out.print(" no changes"); return; } }catch(Exception ex0){ //nothing } url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=edit"); conn = url.openConnection(); conn.setReadTimeout(60000); conn.setRequestProperty("Connection", "Keep-Alive"); conn.setRequestProperty("Cookie", cookie); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)"); conn.setRequestProperty("Accept","text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); in=conn.getInputStream(); read=new BufferedReader(new InputStreamReader(in)); sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } int start=sb.indexOf("<form id=\"editform\"");
int end=sb.indexOf("