查看“用户:Wikibot”的源代码
来自Ubuntu中文
←
用户:Wikibot
跳到导航
跳到搜索
因为以下原因,您没有权限编辑该页面:
您请求的操作仅限属于该用户组的用户执行:
用户
您可以查看和复制此页面的源代码。
机器人,自动将 http://help.ubuntu.com 和 http://wiki.ubuntu.com 由 monimoni 格式转换到 mediawiki 格式,并自动更新和发布的小程序。 由java写成。 <source lang="JAVA"> /* * Main.java * * Created on 2007年5月12日, 下午1:31 * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ package wiki; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.security.GeneralSecurityException; import java.security.Security; import java.security.cert.X509Certificate; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Vector; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSession; import javax.net.ssl.X509TrustManager; /** * * @author oneleaf */ public class Main { List <String> addDict= new Vector<String>(); List <String> oldDict= new Vector<String>(); String cookie=getCookie(); private String getCookie(){ String cookie = ""; try{ URL httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin"); HttpURLConnection httpConn = (HttpURLConnection)httpurl.openConnection(); httpConn.addRequestProperty("Cookie",cookie); httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); httpConn.setRequestProperty("Connection", "Keep-Alive"); httpConn.setUseCaches(false); cookie=httpConn.getHeaderField("Set-Cookie"); String data="wpName=wikibot&wpPassword=********&wpRemember=1"; httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin&action=submitlogin&type=login"); httpConn = (HttpURLConnection)httpurl.openConnection(); httpConn.setRequestMethod("POST"); httpConn.addRequestProperty("Cookie",cookie); httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); httpConn.setRequestProperty("Content-Language","UTF-8" ); httpConn.setRequestProperty("Content-Length", ""+data.getBytes().length); httpConn.setRequestProperty("Connection", "Keep-Alive"); httpConn.setDoOutput(true); httpConn.setDoInput(true); httpConn.setUseCaches(false); DataOutputStream outStream = new DataOutputStream(httpConn.getOutputStream()); outStream.writeBytes(data); outStream.flush(); outStream.close(); List <String> list=httpConn.getHeaderFields().get("Set-Cookie"); for (int i=0;i<list.size();i++){ cookie=cookie+"; "+list.get(i); } // Iterator iter=httpConn.getHeaderFields().keySet().iterator(); // while (iter.hasNext()){ // String key=(String) iter.next(); // List list=httpConn.getHeaderFields().get(key); // System.out.print(key+": "); // for (int i=0;i<list.size();i++){ // System.out.print(list.get(i)); // } // System.out.print("\n"); // } // // cookie=httpConn.getHeaderField("Set-Cookie"); // System.out.println("Cookie_2:"+cookie); // BufferedReader in = new BufferedReader(new InputStreamReader(httpConn.getInputStream())); // String line; // while ((line = in.readLine())!= null){ // System.out.println(line); // // result += line+"\n"; // } // in.close(); } catch (Exception ex){ ex.printStackTrace(); } return cookie; } private void addDict(String dict){ String str=dict.trim(); if (dict.startsWith("/")){ str=dict.substring(1); }else if (dict.indexOf("#")>0){ str=dict.substring(0,dict.indexOf("#")); }else if (dict.indexOf("?")>0){ str=dict.substring(0,dict.indexOf("?")); }else if (dict.startsWith("./")){ str=dict.substring(2); }else if (dict.startsWith("../")){ str=dict.substring(3); } if (str.toLowerCase().indexOf("team")>0) return; if (str.trim().length()==0) return; if (str.trim().length()>=256) return; //../CommandLine if (oldDict.contains(str)) return; if (addDict.contains(str)) return; addDict.add(str); } private void delDict(int dictindex){ oldDict.add(addDict.get(dictindex)); addDict.remove(dictindex); } private void clearDict(){ addDict.clear(); oldDict.clear(); } private void getDicts(String html){ Pattern pattern= Pattern.compile("\\[UbuntuHelp:(.*?)\\]"); Matcher matcher=pattern.matcher(html); while(matcher.find()) { String line=matcher.group(1); if (line.indexOf("|")>0){ addDict(line.substring(0,line.indexOf("|"))); }else{ addDict(line); } } } /** Creates a new instance of Main */ public Main() { SSLContext sslContext = null; try { sslContext = SSLContext.getInstance("TLS"); X509TrustManager[] xtmArray = new X509TrustManager[] { xtm }; sslContext.init(null, xtmArray, new java.security.SecureRandom()); } catch(GeneralSecurityException gse) { } if(sslContext != null) { HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory()); } HttpsURLConnection.setDefaultHostnameVerifier(hnv); } private X509TrustManager xtm = new X509TrustManager() { public void checkClientTrusted(X509Certificate[] chain, String authType) {} public void checkServerTrusted(X509Certificate[] chain, String authType) {} public X509Certificate[] getAcceptedIssuers() { return null; } }; private HostnameVerifier hnv = new HostnameVerifier() { public boolean verify(String hostname, SSLSession session) { return true; } }; public String getUrl(String urladdress,String dict) throws IOException{ URL url=new URL(urladdress); HttpURLConnection httpConn = (HttpURLConnection) url.openConnection(); httpConn.setReadTimeout(60000); httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); httpConn.setRequestProperty("Content-Language","UTF-8" ); httpConn.setRequestProperty("Connection", "Keep-Alive"); InputStream in=httpConn.getInputStream(); BufferedReader read=new BufferedReader(new InputStreamReader(in)); StringBuffer sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } return moin2wm(sb.toString(),urladdress,dict); } public String moin2wm(String html,String url,String dict) throws UnsupportedEncodingException{ String text=html; String ex="UbuntuHelp"; String turl="https://help.ubuntu.com/community/"; if (url.startsWith("https://wiki")){ ex="UbuntuWiki"; turl="https://wiki.ubuntu.com/"; } String head="{{From|"+url.substring(0,url.indexOf("?"))+"}}\r\n{{Languages|"+ex+":"+dict+"}}\r\n"; //删除注释 text=text.replaceAll("\r\n##(.*)",""); text=text.replaceAll("\r\n#format(.*)",""); text=text.replaceAll("\r\n#language(.*)",""); text=text.replaceAll("\r\n#pragma(.*)",""); text=text.replaceAll("\r\n#acl(.*)",""); text=text.replaceAll("^##(.*)\r\n",""); text=text.replaceAll("^#format(.*)\r\n",""); text=text.replaceAll("^#language(.*)\r\n",""); text=text.replaceAll("^#pragma(.*)\r\n",""); text=text.replaceAll("^#acl(.*)\r\n",""); //替换#REDIRECT PDFPrinting => #REDIRECT [[PDFPrinting]] text=text.replaceAll("#REDIRECT (\\S*)","#REDIRECT "+"[["+ex+":$1]]"); text=text.replaceAll("#redirect (\\S*)","#REDIRECT "+"[["+ex+":$1]]"); //#refresh 0 https://wiki.ubuntu.com/ASUS_A3H_5010_Laptop_with_Ubuntu text=text.replaceAll("#REFRESH (.*?) (\\S*)","#REDIRECT "+"[["+ex+":$2]]"); text=text.replaceAll("#refresh (.*?) (\\S*)","#REDIRECT "+"[["+ex+":$2]]"); //删除主题 text=text.replaceAll(".*TableOfContents.*",""); //标题从二开始 text=text.replaceAll("= (.*?) =","== $1 =="); //转化List text=replaceList(text); //[[BR]] -> <BR> text=text.replaceAll("\\[\\[BR\\]\\]","<br>"); //link convert superscripted - ^ * ^ -> <sup> * </sup> text=text.replaceAll("\\^(.*)\\^","<sup>$1</sup>"); //link convert subscripted - ,, * ,, -> <sub> * </sub> text=text.replaceAll(",,(.*?),,","<sub>$1</sub>"); //link convert - [" * "] -> [[ UbuntuHelp: * ]] text=text.replaceAll("\\[\"(.*?)\"\\]","[["+ex+":$1]]"); //link convert - [# * ] -> [[ * ]] text=text.replaceAll("\\[#(.*?)\\]","[[$1]]"); //link convert - [: / * : * ] -> [[ UbuntuHelp:dict\ * | * ]] text=text.replaceAll("\\[:/(.*?):(.*?)\\]","[["+ex+":"+dict+"/$1|$2]]"); //link convert - [: * : * ] -> [[ UbuntuHelp: * | * ]] text=text.replaceAll("\\[:(.*?):(.*?)\\]","[["+ex+":$1|$2]]"); //link convert - [: / * ] -> [[ UbuntuHelp: dict * ]] text=text.replaceAll("\\[:/(.*?)\\]","[["+ex+":"+dict+"/$1]]"); //link convert - [: * ] -> [[ UbuntuHelp: * ]] text=text.replaceAll("\\[:(.*?)\\]","[["+ex+":$1]]"); //link convert - wiki:cat -> [[UbuntuWiki:cat]] text=text.replaceAll(" wiki:(\\S*)"," [[UbuntuWiki:$1]]"); text=text.replaceAll("\r\nwiki:(\\S*)","\r\n[[UbuntuWiki:$1]]"); //link convert - [wiki:cat * ] -> [[UbuntuWiki:cat| * ]] text=text.replaceAll("\\[wiki:(.*?)\\ (.*?)\\]","[[UbuntuWiki:$1|$2]]"); //link convert - [wiki:cat * ] -> [[UbuntuWiki:cat| * ]] text=text.replaceAll("\\[wiki:(.*?)\\]","[[UbuntuWiki:$1]]"); //link convert - [UbuntuWiki:\*] -> [UbuntuWiki:dict\*] text=text.replaceAll("\\[UbuntuWiki:\\\\(.*?)\\]","[UbuntuWiki:"+dict+"\\$1]"); //link convert - [UbuntuHelp:\*] -> [UbuntuHelp:dict\*] text=text.replaceAll("\\[UbuntuHelp:\\\\(.*?)\\]","[UbuntuHelp:"+dict+"\\$1]"); //link convert - __ * __ -> <u> * </u> text=text.replaceAll("__(.*?)__","<u>$1</u>"); //link convert - {{{ * }}} -> <code><nowiki> * </nowiki></code> text=text.replaceAll("\\{\\{\\{(.*?)\\}\\}\\}","<code><nowiki>$1</nowiki></code>"); //link convert - \r\n {{{ * -> \r\n<pre><nowiki> * text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","\r\n<pre><nowiki>$2"); //link convert - {{{ * -> <pre><nowiki> * text=text.replaceAll("\\{\\{\\{(.*)","\r\n<pre><nowiki>$1"); //link convert - * }}} -> * <\pre><\nowiki> text=text.replaceAll("(.*?)\\}\\}\\}","$1</nowiki></pre>"); //CategoryHomepage =>[[category:"+ex+"]]; text=text.replaceAll("Category(\\S*)","[[category:Category$1]]"); text=text.replaceAll("\r\n( *)","\r\n"); text=replaceUrl(text,turl,dict); text=tableConv(text); //[[xxx:http]] => [[http:]] text=text.replaceAll("\\[\\[(.*?):http(.*?)\\]\\]","[[http$2]]"); //[[xxx:ftp]] => [[ftp:]] text=text.replaceAll("\\[\\[(.*?):ftp(.*?)\\]\\]","[[ftp$2]]"); //[[https://wiki.ubuntu.com/*]] => [[UbuntuWiki:]] text=text.replaceAll("\\[\\[https://wiki\\.ubuntu\\.com/(.*?)\\]\\]","[[UbuntuWiki:$1]]"); //[[https://help.ubuntu.com/community/*]] => [[UbuntuHelp:]] text=text.replaceAll("\\[\\[https://help\\.ubuntu\\.com/community/(.*?)\\]\\]","[[UbuntuHelp:$1]]"); //[[http://wiki.ubuntu.com/*]] => [[UbuntuWiki:]] text=text.replaceAll("\\[\\[http://wiki\\.ubuntu\\.com/(.*?)\\]\\]","[[UbuntuWiki:$1]]"); //[[http://help.ubuntu.com/community/*]] => [[UbuntuHelp:]] text=text.replaceAll("\\[\\[http://help\\.ubuntu\\.com/community/(.*?)\\]\\]","[[UbuntuHelp:$1]]"); String foot="\r\n[[category:"+ex+"]]"; if (text.trim().startsWith("#REDIRECT")){ System.out.print(dict + " is redirect :"+text.trim()); return text+head+foot; } if (text.trim().length()<10){ return ""; } return head+text+foot; } public String replaceList(String text){ text=text.replaceAll("\r\n \\. (.*?)","\r\n* $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n*** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n**** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n***** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n****** $1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n*$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n**$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n***$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n****$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n*****$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n******$1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n#: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n##: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n###: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n####: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n#####: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n######: $1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n#$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n##$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n###$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n####$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n#####$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n######$1"); for (int i=1;i<20;i++){ // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n##$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n###$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n####$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#####$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n######$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n**$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n***$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n****$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*****$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n******$1"); } return text; } public String replaceUrl(String text,String baseurl,String dict) throws UnsupportedEncodingException{ //attachment:IconsPage/info.png -> https://help.ubuntu.com/community/IconsPage?action=AttachFile&do=get&target=info.png while (true){ Pattern pattern= Pattern.compile("attachment:(.*?)/(\\S*)"); Matcher matcher=pattern.matcher(text); String replace; if (matcher.find()) { replace=baseurl+matcher.group(1)+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(2), "UTF-8"); text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); continue; } pattern= Pattern.compile("attachment:(\\S*)"); matcher=pattern.matcher(text); if(matcher.find()) { replace=baseurl+dict+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(1), "UTF-8"); text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); continue; } break; } return text; } public String tableConv(String html){ //||a||b||c|| -> {| //||d||e||f|| |a||b||c // |- // |d||e||f // |} String[] lines=html.split("\r\n"); String block = ""; StringBuffer bf= new StringBuffer(); boolean start=false; for (int i=0;i<lines.length;i++){ String line=lines[i].trim(); if (line.startsWith("||")){ if (! start){ start=true; String str=line.substring(1,line.length()-2); str=str.replaceAll("<bgcolor.*?>",""); str=str.replaceAll("<style.*?>",""); str=str.replaceAll("<rowbgcolor.*?>",""); block="{|border=\"1\" cellspacing=\"0\"\r\n"+str; }else{ String str=line.substring(1,line.length()-2); str=str.replaceAll("<bgcolor.*?>",""); str=str.replaceAll("<style.*?>",""); str=str.replaceAll("<rowbgcolor.*?>",""); block=block+"\r\n|-\r\n"+str; } }else{ if (start){ block=block+"\r\n|}\r\n"; bf.append(block); start=false; } bf.append(lines[i]+"\r\n"); } } if (start){ block=block+"\r\n|}\r\n"; bf.append(block); start=false; } return bf.toString(); } public void putText(String dict,String html,String surl) throws MalformedURLException, IOException{ URL url; URLConnection conn; InputStream in; BufferedReader read; StringBuffer sb; if (html.length()<5){ System.out.print(" is short:"+html); return; } // if (html.length()<300){ // if (html.toUpperCase().trim().indexOf("REFRESH")>0) { // System.out.println(dict+" is REFRESH."); // return; // } // if (html.toUpperCase().trim().indexOf("REDIRECT")>0) { // System.out.println(dict+" is REDIRECT."); // return; // } // } String ex="UbuntuHelp"; if (surl.startsWith("https://wiki")){ ex="UbuntuWiki"; } try{ url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=raw"); conn = url.openConnection(); conn.setReadTimeout(60000); conn.setRequestProperty("Cookie", cookie); in=conn.getInputStream(); read=new BufferedReader(new InputStreamReader(in)); sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } if (sb.toString().trim().length()==html.trim().length()) { System.out.print(" no changes"); return; } }catch(Exception ex0){ //nothing } url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=edit"); conn = url.openConnection(); conn.setReadTimeout(60000); conn.setRequestProperty("Connection", "Keep-Alive"); conn.setRequestProperty("Cookie", cookie); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)"); conn.setRequestProperty("Accept","text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); in=conn.getInputStream(); read=new BufferedReader(new InputStreamReader(in)); sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } int start=sb.indexOf("<form id=\"editform\""); int end=sb.indexOf("<div class=\"printfooter\">"); String from; try{ from=sb.substring(start,end); }catch(Exception ex0){ System.out.println(dict+" error,please set cookie!"); System.out.println(sb); return; } Map<String,String> map=getPostDate(from); Iterator<String> iterator=map.keySet().iterator(); url = new URL("http://wiki.ubuntu.org.cn/index.php?title="+ex+":"+URLEncoder.encode(dict, "UTF-8")+"&action=submit"); HttpURLConnection conn2 = (HttpURLConnection) url.openConnection(); String boundary="---------------------------167593640336579986891120154"; conn2.setReadTimeout(60000); conn2.setDoOutput(true); conn2.setAllowUserInteraction(false); conn2.setRequestMethod("POST"); conn2.setRequestProperty("Cookie", cookie); conn2.setRequestProperty("Content-Type", "multipart/form-data; boundary="+boundary); conn2.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)"); conn2.setRequestProperty("Accept","text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); conn2.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5"); conn2.setRequestProperty("Referer", "http://wiki.ubuntu.org.cn/index.php?title=UbuntuHelp:"+URLEncoder.encode(dict, "UTF-8")+"&action=edit"); conn2.setRequestProperty("Accept-Charset", "UTF-8,*"); conn2.setRequestProperty("Connection", "Keep-Alive"); StringBuffer content=new StringBuffer(); while (iterator.hasNext()){ String name=iterator.next(); if (name.equals("wpPreview")) continue; if (name.equals("wpDiff")) continue; if (name.equals("wpWatchthis")) continue; String value=""; if (name.equals("wpTextbox1")){ value=html; }else { value=map.get(name); } content.append("--"+boundary+"\r\n"); content.append("Content-Disposition: form-data; name=\""+name+"\"\r\n\r\n"); content.append(value+"\r\n"); } content.append("--"+boundary+"--\r\n\r\n"); byte[] data=content.toString().getBytes(); conn2.setRequestProperty("Content-Length", String.valueOf(data.length)); OutputStream out = conn2.getOutputStream(); out.write(data); out.flush(); // Get the response try{ BufferedReader rd = new BufferedReader(new InputStreamReader(conn2.getInputStream())); String line; while ((line = rd.readLine()) != null) { //System.out.println(line); // Nothing break; } rd.close(); }catch(Exception ex0){ System.out.println(" add. but has a error:"+ex0.getMessage()); out.close(); return; } out.close(); // System.out.println(dict+" add."); }; private Map<String,String> getPostDate(String from){ Map<String,String> map=new HashMap<String,String>(); Pattern pattern= Pattern.compile("<input(.*?)/>"); Matcher matcher=pattern.matcher(from); Pattern pname=Pattern.compile("name=[\"|'](.*?)[\"|']"); Pattern pvalue=Pattern.compile("value=[\"|'](.*?)[\"|']"); while(matcher.find()) { String name,value; String input=matcher.group(1); Matcher mname=pname.matcher(input); Matcher mvalue=pvalue.matcher(input); if (mname.find()){ name=mname.group(1); }else{ continue; }; if (mvalue.find()){ value=mvalue.group(1); }else{ if (input.indexOf("checkbox")>0){ value="0"; }else{ value=""; } }; map.put(name,value); } // int start=from.indexOf("cols='80' style=\"width:100%\" >"); // int end=from.indexOf("</textarea>"); // // map.put("wpTextbox1",from.substring(start,end)); map.put("wpTextbox1",""); return map; } public void helpstart() throws IOException{ clearDict(); // String dict="community/"; // String out = getUrl("https://help.ubuntu.com/"+dict+"?action=raw",dict); // getDicts(out); // putText(dict,out,"https://help.ubuntu.com/community/"); InputStream in=null; File saveFile=new File("/tmp/helpindex.html"); if (saveFile.exists()){ in=new FileInputStream(saveFile); }else{ URL url=new URL("https://help.ubuntu.com/community/TitleIndex"); in=url.openConnection().getInputStream(); } BufferedReader read=new BufferedReader(new InputStreamReader(in)); StringBuffer sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } read.close(); if (!saveFile.exists()){ FileOutputStream out=new FileOutputStream(saveFile); out.write(sb.toString().getBytes()); out.flush(); out.close(); } // URL url=new URL("https://help.ubuntu.com/community/TitleIndex"); // InputStream in=url.openConnection().getInputStream(); // BufferedReader read=new BufferedReader(new InputStreamReader(in)); // StringBuffer sb=new StringBuffer(); // while (true){ // String line=read.readLine(); // if (line==null) break; // sb.append(line+"\r\n"); // } String html=sb.substring(sb.indexOf("<a name=\"3\">"),sb.indexOf("<a name=\"%5b\">")); Pattern pattern= Pattern.compile("<a href=\"/community/(.*?)\">"); Matcher matcher=pattern.matcher(html); while(matcher.find()) { String input=matcher.group(1); addDict(input); System.out.println(input); } String dict; String out; System.out.println("一共需要转换 "+String.valueOf(addDict.size())+" 篇文章。"); while (addDict.size()>0){ // if (addDict.size()==0) break; // dict=addDict.get(addDict.size()-1); dict=addDict.get(0); System.out.print(String.valueOf(addDict.size())+" "+dict); try{ // if (addDict.size()>1500) continue; try { System.out.print(" read"); out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex){ try { System.out.print(" read again"); out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex2){ continue; } } try { System.out.print(" get dict"); getDicts(out); System.out.print(" . put"); putText(dict,out,"https://help.ubuntu.com/community/"+dict); System.out.print(" .\r\n"); }catch(Exception ex){ System.out.println(dict+" error:"+ex.getMessage()); continue; } }finally{ delDict(0); } } } public void wikistart() throws MalformedURLException, IOException{ clearDict(); InputStream in=null; File saveFile=new File("/tmp/wikiindex.html"); if (saveFile.exists()){ in=new FileInputStream(saveFile); }else{ URL url=new URL("https://wiki.ubuntu.com/TitleIndex"); in=url.openConnection().getInputStream(); } BufferedReader read=new BufferedReader(new InputStreamReader(in)); StringBuffer sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } read.close(); if (!saveFile.exists()){ FileOutputStream out=new FileOutputStream(saveFile); out.write(sb.toString().getBytes()); out.flush(); out.close(); } String html=sb.substring(sb.indexOf("<a name=\"0\">"),sb.indexOf("<a name=\"%5b\">")); Pattern pattern= Pattern.compile("<a href=\"/(.*?)\">"); Matcher matcher=pattern.matcher(html); while(matcher.find()) { String input=matcher.group(1); addDict(input); System.out.println(input); } String dict; String out; System.out.println("一共需要转换 "+String.valueOf(addDict.size())+" 篇文章。"); while (addDict.size()>0){ dict=addDict.get(0); System.out.print(String.valueOf(addDict.size())+" "+dict); try{ // if (addDict.size()>12285) continue; try { System.out.print(" read"); out=getUrl("https://wiki.ubuntu.com/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex){ ex.printStackTrace(); try { System.out.print(" read again"); out=getUrl("https://wiki.ubuntu.com/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex2){ ex2.printStackTrace(); continue; } } try { System.out.print(" get dict"); getDicts(out); System.out.print(" . put"); putText(dict,out,"https://wiki.ubuntu.com/"+dict); System.out.print(" .\r\n"); }catch(Exception ex){ System.out.println(dict+" error:"+ex.getMessage()); } }finally{ delDict(0); } } } public void test() throws IOException{ // String sss="\r\ndd attachment:IconsPage/info.png ClamAV can only\r\n"; // sss=sss.replaceAll("attachment:(.*?)/(.*?) ","https://help.ubuntu.com/community/$1?action=AttachFile&do=get&target=$2 "); // System.out.println(sss); // String dict="RestrictedFormats"; // String out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); // System.out.println(out); // String text="d CategoryHome dd"; // text=text.replaceAll("Category(\\S*)","[[category:Category$1]]"); // System.out.println(text); // String out=tableConv("dddd\r\n||xxx||nnn||ddd||\r\n||dd||xxdee||dd||\r\nxdd"); // System.out.println(out); // String text="#title User Documentation\r\n##Please discuss major/structural changes to this page on the Documentation team mailing list at: http://lists.ubuntu.com/mailman/listinfo/ubuntu-doc\r\n##If you want to get involved with editing and organising the Wiki please visit DocumentationTeam.\r\n## This page is designed to remain mostly static - make and propose changes to the pages that are linked to from this page\r\n## For help on contributing to the wiki, see the WikiGuide\r\n||<tablestyl"; // text=text.replaceAll("\r\n#(.*)",""); // text=text.replaceAll("^#(.*?)\r\n",""); // System.out.println(text); // String text="sss\r\ndddf{{{dxx\r\n}}}\r\n . {{{ddd}}}\r\n .{{{ddd}}}\r\n{{{de}}}"; // //link convert - \r\n {{{ * -> \r\n<pre><nowiki> * // text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","\r\n<pre><nowiki>$2"); //link convert - {{{ * -> <pre><nowiki> * // System.out.println(text); // text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","<pre><nowiki>$2"); //link convert - {{{ * -> <pre><nowiki> * // text=text.replaceAll("\\{\\{\\{(.*)","\r\n<pre><nowiki>$1"); // //link convert - * }}} -> * <\pre><\nowiki> // text=text.replaceAll("(.*?)\\}\\}\\}","$1</nowiki></pre>"); // // System.out.println(text); // getCookie(); // String url="http://bingniu.3322.org/mywiki/OpenLDAPAdminGuide/SecurityConsideration"; // String out=getUrl(url+"?action=raw","UbuntuManual"); // System.out.println(out); // BufferedReader read=new BufferedReader(new FileReader("/home/wangpian/a1.txt")); // String s=""; // StringBuffer str=new StringBuffer(); // while (true){ // s=read.readLine(); // if (s==null)break; // str.append(s+"\r\n"); // } // s=str.toString(); // s=s.replaceAll("\\[\\[\\[.*?\\]\\]\\]",""); // // System.out.println(s.replaceAll("\\[\\[UbuntuHelp(.*?)\\|(.*?)\\]\\]","$2")); } /** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here Main main=new Main(); try { // main.test(); // main.helpstart(); main.wikistart(); }catch (Exception ex){ ex.printStackTrace(); } } } </source>
返回
用户:Wikibot
。
导航菜单
页面操作
用户页
讨论
阅读
查看源代码
历史
页面操作
用户页
讨论
更多
工具
个人工具
登录
导航
首页
最近更改
随机页面
页面分类
帮助
搜索
编辑
编辑指南
沙盒
新闻动态
字词处理
工具
链入页面
相关更改
用户贡献
日志
查看用户组
特殊页面
页面信息