先把mobi拆成源代码

stardict字典格式:

单词\t后续的解释\\n后续的解释\n
即stardict字典的呈现比较单调,\t制表符,\\n stardict会解释成换行,\n表示单词解释结束。

后续就简单了,提取mobi源代码中各个单词项,逐个转换。

比如:

import com.sun.xml.internal.ws.util.ByteArrayBuffer;

import java.io.*;

public class Test {

    public static void main(String[] args) throws Exception {
        BufferedReader br = new BufferedReader(new FileReader("C:\\Users\\tanmingxin\\Desktop\\11\\mobi7\\wocao.html"));
        PrintWriter pw = new PrintWriter( new FileOutputStream("C:\\Users\\tanmingxin\\Desktop\\11\\mobi7\\wocao.txt"));
        String line = null;
        int count = 0;
        while ((line = br.readLine()) != null) {
            StringBuffer sb = new StringBuffer();
            String[] strs = line.split("<br/>");
            for (int i = 0; i < strs.length; i++) {
                byte[] bytes = strs[i].trim().getBytes();
                boolean hasHead = false;
                ByteArrayBuffer byteArrayBuffer = new ByteArrayBuffer();
                for (int j = 0; j < bytes.length; j++) {
                    byte b = bytes[j];
                    if (hasHead) {
                        if (b == '>') {
                            hasHead = false;
                        }
                    } else {
                        if (b != '<') {
                            byteArrayBuffer.write(b);
                        } else {
                            hasHead = true;
                        }
                    }
                }
                String newStr = byteArrayBuffer.toString();
                byteArrayBuffer.close();
                sb.append(newStr.trim());
                if (i == 0) {
                    sb.append('\t');
                } else {
                    if (i + 1 != strs.length) {
                        sb.append('\\').append('n');
                    }

                }

            }

            pw.println(sb.toString());
//            count++;
//            if (count == 10) {
//                break;
//            }

        }
        pw.close();
        br.close();
    }

    /*
    <idx:entry scriptable="yes"><idx:orth value="Aa"></idx:orth><b><word>A, </word><word>a<br/></word></b>
     <phonetic>/eɪ/</phonetic><variant>(<b> [复] A's, a's 或As, as</b><phonetic>/eɪz/</phonetic>)</variant> <category><cat><b><i>n<br/></i></b></cat><sense ><b>1.</b> <gram>[<b>C</b>]</gram>
     <description>英文第一个字母<br/></description> </sense> <sense ><b>2.</b> <gram>[<b>C</b>]</gram> <description>A字形(物)<br/></description> </sense> <sense ><b>3.</b> <gram>[<b>U</b>]</gram>
      <description>A(血)型<br/></description> </sense> <sense ><b>4.</b> <gram>[<b>C</b>]</gram> <description>(学业成绩)甲,优</description> <example> <source>get an <i>A</i> on a test in math</source>
       <target>数学考试获得优等<br/></target> </example> </sense> <sense ><b>5.</b> <gram>[<b>C,U</b>]</gram> <field>【音】</field><description> C大调音阶中的第六音(或音符)<br/></description> </sense>
       </category> <phrase> <word>from A to B<br/></word> <sense > <description>从甲地到乙地</description> <example> <source>hire a car as a means of getting from <i>A</i> to B</source>
       <target>租一辆车作为从一地到另一地的交通工具<br/></target> </example> </sense> </phrase> <phrase> <word>from A to Z<br/></word> <sense >
     <description>从头到尾;彻底地;完全地</description> <example> <source>know the subject from <i>A</i> to Z</source> <target>对这一科目了如指掌</target> </example> </sense> </phrase> </idx:entry><hr/>
     */
    public static void a() throws Exception{
        FileInputStream is = new FileInputStream("C:\\Users\\tanmingxin\\Desktop\\11\\mobi7\\book1.html");
        PrintWriter pw = new PrintWriter( new FileOutputStream("C:\\Users\\tanmingxin\\Desktop\\11\\mobi7\\wocao.html"));

        int count = 0;
        int i = -1;
        byte[] bytes = new byte[1024 * 1024];
        int writeidx = 0;
        boolean hasHead = false;
        int headerIdx = 0;
        while ((i = is.read()) != -1) {
            bytes[writeidx] = (byte) i;
            writeidx++;
            if (!hasHead) {
                if (writeidx >= 10) {
                    byte[] header = new byte[10];
                    System.arraycopy(bytes, writeidx - 10, header, 0, 10);
                    if ("<idx:entry".equals(new String(header))) {
                        hasHead = true;
                        headerIdx = writeidx - 10;
                    }
                }
            } else {
                byte[] tail = new byte[12];
                System.arraycopy(bytes, writeidx - 12, tail, 0, 12);
                if ("</idx:entry>".equals(new String(tail))) {
                    int length = writeidx - headerIdx + 1;
                   byte[] completeBytes = new byte[length];
                   System.arraycopy(bytes, headerIdx - 1, completeBytes, 0, length);
                   String compleStr = new String(completeBytes);
//                    System.out.println(compleStr);
                    pw.println(compleStr);
                    headerIdx = 0;
                    writeidx = 0;
                    hasHead = false;
                    count++;
//                    if (count == 10) {
//                        break;
//                    }
                }
            }

        }
        pw.close();
        is.close();

    }
}

最后用stardict-editor.exe编码字典

经过多次修正,完美的将kindle的现代英汉词典转成文石max3自带词典可用格式;
链接: https://pan.baidu.com/s/1s79s9EmoM8M6jHJyBLyjhg 提取码: 3vyg

新增欧陆字典.eudic格式,欧陆字典制作方式参考 https://www.eudic.net/v4/en/home/EudicBuilder

最后修改:2023 年 03 月 11 日
如果觉得我的文章对你有用,请随意赞赏