| 注册
请输入搜索内容

热门搜索

Java Linux MySQL PHP JavaScript Hibernate jQuery Nginx
ph44
10年前发布

从源代码里提取中文字符串的java类

工作中需要优化代码里的中文警示语和异常信息,实在比较多,所以就写了个程序专门从代码里提取中文字符串。

import java.io.*;  import java.util.*;  import java.util.regex.Matcher;  import java.util.regex.Pattern;    /**   * 搜索字符串并输出到控制台   */  public class ExtractStr {      public static String getHelpString(String[] args){          String result = String.format("%s [path]", new Object[]{"a"});          return result;      }        private void getFiles(String rootPath, final String fileExt, List<File> fileList){          File f =new File(rootPath);          File[] list=f.listFiles(new FileFilter() {              @Override              public boolean accept(File f) {                  boolean ret = f.isDirectory() || (f.isFile() && f.getName().endsWith(fileExt));                  return ret;              }          });          for(File fn : list){              if (fn.isDirectory()){                  this.getFiles(fn.getAbsolutePath(), fileExt, fileList);              }else{                  fileList.add(fn);              }          }      }        public List<File> run(String rootPath, String fileExt){          List<File> result = new LinkedList<File>();          this.getFiles(rootPath, fileExt, result);          return result;      }        public List<String> parserSourceFile(List<String> patternList, File file, int miniCharCount) throws Exception {          List<String> result = new LinkedList<String>();          BufferedReader r = new BufferedReader(new FileReader(file));          char[] buffer = new char[(int)file.length()];          r.read(buffer, 0, (int)file.length());          String text=new String(buffer, 0, buffer.length);            for(String patternStr : patternList){              //Pattern pattern = Pattern.compile("\"(.*?)\"");              Pattern pattern = Pattern.compile(patternStr);              Matcher matchers= pattern.matcher(text);              while(matchers.find()){                  String t=matchers.group();                  if (t.length()>=miniCharCount)                      result.add(t);              }          }          return result;      }        public static void main(String []args){          if (args.length==0){              StringBuilder sb = new StringBuilder()                      .append("未传入需要搜索的有效的源代码路径")                      .append("\n")                      .append(ExtractStr.getHelpString(args));              System.out.println(sb.toString());              System.exit(1);          }          List<String> searchFolders=new ArrayList<String>(100);          for(int i=0; i<=args.length-1;i++){              File f=new File(args[i]);              if (!f.isDirectory() || !f.exists())                  continue;              searchFolders.add(f.getAbsolutePath());          }            List<String> patternList = new LinkedList<String>();          patternList.add("'([\\u4E00-\\u9FA5]+)'");          patternList.add("\"([\\u4E00-\\u9FA5]+)\"");            ExtractStr es = new ExtractStr();          List<File> fileList = new ArrayList<File>(1000);          for(String sarchFolder : searchFolders){              List<File> t=es.run(sarchFolder, ".php");              fileList.addAll(t);          }            Set<String> outList=new HashSet<String>();          for(File f : fileList){              try{                  List<String> items=es.parserSourceFile(patternList, f, 12);                  outList.addAll(items);              }catch(Exception e){                  e.printStackTrace();              }          }          for(String str : outList){              System.out.println(str);          }      }  }

对于拼接字符串的中文输出支持的不算好,而且也对中文的长度有限制(最少12个字符)