日本食品標準成分表2010の食品番号をカテゴリー分類する その2

日本食品標準成分表2010』のPDFを.txtファイルに変換するの記事を参考に,ファイル名 “1299012_1.pdf” から “1299012_18.pdf” までのファイルをダウンロードします.PDF1ファイルの全テキストをコピーしてワークシート1枚に貼り付けのオプションでペーストします.テキストファイルウィザード1/3では元のデータ形式で『カンマやタブなどの区切り文字によってフィールドごとに区切られたデータ』を選択します.テキストファイルウィザード2/3では特に変更なく次へ進みます.テキストファイルウィザード3/3では最初のカラムの列のデータ形式のみ『文字列』に変更して完了をクリックします.この作業をPDFファイル分繰り返します.主にA列に対して若干の修正を施します.さらに Webテク実験室 からダウンロードしたブック “成分表2010.xls” のワークシートをコピーし,シート名を “Sheet0” に変更します.このEXCELのブックに “Category.xlsm” と名前を付けて保存します.

日本食品標準成分表2010の食品番号をカテゴリー分類する その1で作成した ”Sample.xlsm” ブックから ”Result” シートを “Category.xlsm” ブックに移動又はコピーします.AltキーとF11キーを押下してVBEを起動します.標準モジュールを挿入し,下記コードを貼り付けて実行して下さい.結果として “M_CATEGORY” という名のシートが生成します.

Option Explicit
Sub Select_Class()
Dim tmpSht              As Worksheet
Dim tmpRng              As Range
Dim tmpArray            As Variant
Dim workArray           As Variant
Dim h                   As Long
Dim i                   As Long
Dim j                   As Long
Dim k                   As Long
Dim l                   As Long
Dim m                   As Long
Dim n                   As Long
Dim p                   As Long
Dim q                   As Long
Dim r                   As Long
Dim RegExp_Japanese     As Object
Dim RegExp_English      As Object
Dim RegExp_ItemNum      As Object
Const PtnJPN            As String = "[^A-Za-z0-9'\.\-\*]{2,}"
Const PtnENG            As String = "^[A-Za-z0-9'\,\.\-\%]+$"
Const PtnItemNum        As String = "^[0-9]{5}$"
Dim Item_Number()       As String
Dim JapaneseItem()      As String
Dim EnglishItem()       As String
Dim EnglishString       As String
Dim JapaneseClass()     As String
Dim English_Class()     As String
Dim ClassStringEN       As String
Dim ItemNumArray()      As String
Dim ItemENGArray()      As String
Dim ClassArrayJP()      As String
Dim ClassArrayEN()      As String
Dim RegExp_AngleBracket As Object
Dim RegExp_RoundStartJP As Object
Dim RegExp_RoundStartEN As Object
Dim RegExp_RoundExitEN  As Object
Const Ptn_Round_Start   As String = "^(\(|()"
Const Ptn_Round_Exit    As String = "(\)|))$"
Dim StringRoundEnglish  As String
Dim SubClassJapanese()  As String
Dim SubClass_English()  As String
Dim RegExp_Square_Start As Object
Dim RegExp_SquareExitEN As Object
Const Ptn_Angle_Start   As String = "^[<<]"
Const Ptn_SquareStart   As String = "^\["
Const Ptn_Square_Exit   As String = "\]$"
Dim MidleClassJP()      As String
Dim MidleClassEN()      As String
Dim StrMidClassENG      As String
Dim SubClass_JPN()      As String
Dim SubClass_ENG()      As String
Dim mySht               As Worksheet
Dim myRng               As Range
Dim myAr                As Variant
Dim workArray2()        As String
Dim workArray3()        As String
Dim mySht2              As Worksheet
Dim myRng2              As Range
Dim myAr2               As Variant
Dim CEREALS             As Long
Dim POTATOES            As Long
Dim SUGARS              As Long
Dim PULSES              As Long
Dim NUTS                As Long
Dim VEGETABLES          As Long
Dim FRUITS              As Long
Dim MUSHROOMS           As Long
Dim ALGAE               As Long
Dim FISHES              As Long
Dim MEATS               As Long
Dim EGGS                As Long
Dim MILK                As Long
Dim OIL                 As Long
Dim CONFECTIONERIES     As Long
Dim BEVERAGES           As Long
Dim SEASONINGS          As Long
Dim PREPARED            As Long
    Set RegExp_Japanese = CreateObject("VBScript.RegExp")
    With RegExp_Japanese
        .Pattern = PtnJPN
        .IgnoreCase = True
        .Global = True
    End With
    Set RegExp_English = CreateObject("VBScript.RegExp")
    With RegExp_English
        .Pattern = PtnENG
        .IgnoreCase = True
        .Global = True
    End With
    Set RegExp_ItemNum = CreateObject("VBScript.RegExp")
    With RegExp_ItemNum
        .Pattern = PtnItemNum
        .IgnoreCase = True
        .Global = True
    End With
    Set RegExp_Square_Start = CreateObject("VBScript.RegExp")
    With RegExp_Square_Start
        .Pattern = Ptn_SquareStart
        .IgnoreCase = True
        .Global = True
    End With
    Set RegExp_SquareExitEN = CreateObject("VBScript.RegExp")
    With RegExp_SquareExitEN
        .Pattern = "[A-Za-z0-9'\,\.\-\%]+" & Ptn_Square_Exit
        .IgnoreCase = True
        .Global = True
    End With
    Set RegExp_RoundStartJP = CreateObject("VBScript.RegExp")
    With RegExp_RoundStartJP
        .Pattern = Ptn_Round_Start & "[^A-Za-z0-9'\.\-\*]{2,}"
        .IgnoreCase = True
        .Global = True
    End With
    Set RegExp_RoundStartEN = CreateObject("VBScript.RegExp")
    With RegExp_RoundStartEN
        .Pattern = Ptn_Round_Start & "[A-Za-z'\,\.\-\%]+"
    End With
    Set RegExp_RoundExitEN = CreateObject("VBScript.RegExp")
    With RegExp_RoundExitEN
        .Pattern = "[A-Za-z0-9'\,\.\-\%]+" & Ptn_Round_Exit
        .IgnoreCase = True
        .Global = True
    End With
j = 0
k = 0
l = 0
m = 0
q = 0
For Each tmpSht In Worksheets
    If tmpSht.Name = "M_CATEGORY" Then
        MsgBox prompt:="M_CATEGORYと同名のシートがあります." & vbCrLf & _
                        "処理を終了します", _
              Buttons:=vbOKOnly, _
                Title:="内部エラー"
        Exit Sub
    End If
    If tmpSht.Name <> "Sheet0" And _
       tmpSht.Name <> "Sheet00" And _
       tmpSht.Name <> "Result" Then
        Set tmpRng = tmpSht.UsedRange
        tmpArray = tmpRng
        workArray = NoCancelArray(tmpArray)
        For h = LBound(workArray) To UBound(workArray)
            For i = workArray(h, 0) To workArray(h, 1)
                On Error Resume Next
                If RegExp_ItemNum.Test(tmpArray(i, 1)) And _
                   tmpArray(i, 2) <> "(欠番)" Then
                    EnglishString = ""
                    ReDim Preserve Item_Number(j)
                    ReDim Preserve JapaneseItem(j)
                    ReDim Preserve EnglishItem(j)
                    For p = 1 To 6
                        If RegExp_English.Test(tmpArray(i + 1, p)) Then
                            EnglishString = EnglishString & " " & tmpArray(i + 1, p)
                            EnglishString = Trim(EnglishString)
                        Else
                            Exit For
                        End If
                    Next p
                    Item_Number(j) = tmpArray(i, 1)
                    JapaneseItem(j) = tmpArray(i, 2)
                    EnglishItem(j) = EnglishString
                    j = j + 1
                End If
                On Error GoTo 0
                If RegExp_Japanese.Test(tmpArray(i, 1)) And _
                   RegExp_English.Test(tmpArray(i + 1, 1)) Then
                    ClassStringEN = ""
                    ReDim Preserve JapaneseClass(k)
                    ReDim Preserve English_Class(k)
                    For p = 1 To 6
                        If RegExp_English.Test(tmpArray(i + 1, p)) Then
                            ClassStringEN = ClassStringEN & " " & tmpArray(i + 1, p)
                            ClassStringEN = Trim(ClassStringEN)
                        Else
                            Exit For
                        End If
                    Next p
                    JapaneseClass(k) = tmpArray(i, 1)
                    English_Class(k) = ClassStringEN
                    k = k + 1
                End If
                If RegExp_Square_Start.Test(tmpArray(i, 1)) And _
                   RegExp_Square_Start.Test(tmpArray(i + 1, 1)) Then
                    StrMidClassENG = ""
                    ReDim Preserve MidleClassJP(l)
                    ReDim Preserve MidleClassEN(l)
                    For p = 1 To 6
                        StrMidClassENG = StrMidClassENG + " " + tmpArray(i + 1, p)
                        StrMidClassENG = Trim(StrMidClassENG)
                        If RegExp_SquareExitEN.Test(tmpArray(i + 1, p)) Then Exit For
                    Next p
                    MidleClassJP(l) = tmpArray(i, 1)
                    MidleClassEN(l) = StrMidClassENG
                    l = l + 1
                End If
                If RegExp_RoundStartJP.Test(tmpArray(i, 1)) And _
                   RegExp_RoundStartEN.Test(tmpArray(i + 1, 1)) Then
                    StringRoundEnglish = ""
                    ReDim Preserve SubClassJapanese(m)
                    ReDim Preserve SubClass_English(m)
                    For p = 1 To 6
                        StringRoundEnglish = StringRoundEnglish & " " & tmpArray(i + 1, p)
                        StringRoundEnglish = Trim(StringRoundEnglish)
                        If RegExp_RoundExitEN.Test(tmpArray(i + 1, p)) Then Exit For
                    Next p
                    tmpArray(i, 1) = Replace(tmpArray(i, 1), "(", "(")
                    tmpArray(i, 1) = Replace(tmpArray(i, 1), ")", ")")
                    SubClassJapanese(m) = tmpArray(i, 1)
                    StringRoundEnglish = Replace(StringRoundEnglish, "(", "(")
                    StringRoundEnglish = Replace(StringRoundEnglish, ")", ")")
                    SubClass_English(m) = StringRoundEnglish
                    m = m + 1
                End If
            Next i
        Next h
        q = q + 1
    End If
Next tmpSht
Set mySht = Worksheets("Sheet0")
Set myRng = Intersect(mySht.Range("A:H"), mySht.UsedRange)
myAr = myRng
ReDim workArray2(UBound(myAr) - 1, 16)
For i = LBound(workArray2) To UBound(workArray2)
    workArray2(i, 0) = myAr(i + 1, 1)
    workArray2(i, 1) = myAr(i + 1, 2)
    workArray2(i, 2) = myAr(i + 1, 3)
    myAr(i + 1, 4) = Replace(myAr(i + 1, 4), "(", "(")
    myAr(i + 1, 4) = Replace(myAr(i + 1, 4), ")", ")")
    workArray2(i, 6) = myAr(i + 1, 4)
    workArray2(i, 8) = myAr(i + 1, 5)
    workArray2(i, 10) = myAr(i + 1, 6)
    workArray2(i, 12) = myAr(i + 1, 7)
    workArray2(i, 14) = myAr(i + 1, 8)
Next i
Set mySht2 = Worksheets("Result")
Set myRng2 = mySht2.UsedRange
myAr2 = myRng2
For i = LBound(workArray2) To UBound(workArray2)
    For k = LBound(JapaneseClass) To UBound(JapaneseClass)
        If workArray2(i, 2) = JapaneseClass(k) Then
           workArray2(i, 3) = English_Class(k)
        End If
        If workArray2(i, 4) = JapaneseClass(k) Then
           workArray2(i, 5) = English_Class(k)
        End If
        If workArray2(i, 8) = JapaneseClass(k) Then
           workArray2(i, 9) = English_Class(k)
        End If
        If workArray2(i, 12) = JapaneseClass(k) Then
           workArray2(i, 13) = English_Class(k)
        End If
    Next k
    For m = LBound(SubClassJapanese) To UBound(SubClassJapanese)
        If workArray2(i, 6) = SubClassJapanese(m) Then
           workArray2(i, 7) = SubClass_English(m)
        End If
    Next m
    For l = UBound(MidleClassJP) To LBound(MidleClassJP) Step -1
        If workArray2(i, 10) = MidleClassJP(l) Then
           workArray2(i, 11) = MidleClassEN(l)
        End If
    Next l
    For r = LBound(myAr2) To UBound(myAr2)
        If workArray2(i, 0) = myAr2(r, 1) Then
            workArray2(i, 4) = myAr2(r, 5)
            On Error Resume Next
            Select Case True
            Case workArray2(i, 0) >= "10001" And workArray2(i, 0) <= "10278"
                 workArray2(i, 4) = "<魚類>"
            Case workArray2(i, 0) >= "10319" And workArray2(i, 0) <= "10341"
                 workArray2(i, 4) = "<えび・かに類>"
            Case workArray2(i, 0) >= "10342" And workArray2(i, 0) <= "10362"
                 workArray2(i, 4) = "<いか・たこ類>"
            Case workArray2(i, 0) >= "10376" And workArray2(i, 0) <= "10388"
                 workArray2(i, 4) = "<水産練り製品>"
            Case workArray2(i, 0) >= "11205" And workArray2(i, 0) <= "11240"
                 workArray2(i, 4) = "<鳥肉類>"
            Case workArray2(i, 0) >= "11245" And workArray2(i, 0) <= "11246"
                 workArray2(i, 4) = "<獣肉類>"
            Case workArray2(i, 0) >= "11247" And workArray2(i, 0) <= "11247"
                 workArray2(i, 4) = "<鳥肉類>"
            Case workArray2(i, 0) >= "13001" And workArray2(i, 0) <= "13050"
                 workArray2(i, 4) = "<牛乳及び乳製品>"
            Case workArray2(i, 0) >= "15001" And workArray2(i, 0) <= "15040"
                 workArray2(i, 4) = "<和生菓子・和半生菓子類>"
            Case workArray2(i, 0) >= "15041" And workArray2(i, 0) <= "15068"
                 workArray2(i, 4) = "<和干菓子類>"
            Case workArray2(i, 0) >= "15069" And workArray2(i, 0) <= "15072"
                 workArray2(i, 4) = "<菓子パン類>"
            Case workArray2(i, 0) >= "15073" And workArray2(i, 0) <= "15085"
                 workArray2(i, 4) = "<ケーキ・ペストリー類>"
            Case workArray2(i, 0) >= "15086" And workArray2(i, 0) <= "15091"
                 workArray2(i, 4) = "<デザート菓子類>"
            Case workArray2(i, 0) >= "15092" And workArray2(i, 0) <= "15100"
                 workArray2(i, 4) = "<ビスケット類>"
            Case workArray2(i, 0) >= "15101" And workArray2(i, 0) <= "15104"
                 workArray2(i, 4) = "<スナック類>"
            Case workArray2(i, 0) >= "15105" And workArray2(i, 0) <= "15113"
                 workArray2(i, 4) = "<キャンデー類>"
            Case workArray2(i, 0) >= "15114" And workArray2(i, 0) <= "15116"
                 workArray2(i, 4) = "<チョコレート類>"
            Case workArray2(i, 0) >= "15117" And workArray2(i, 0) <= "15117"
                 workArray2(i, 4) = "<果実菓子類>"
            Case workArray2(i, 0) >= "15118" And workArray2(i, 0) <= "15120"
                 workArray2(i, 4) = "<チューインガム類>"
            Case workArray2(i, 0) >= "16001" And workArray2(i, 0) <= "16032"
                 workArray2(i, 4) = "<アルコール飲料類>"
            Case workArray2(i, 0) >= "16033" And workArray2(i, 0) <= "16044"
                 workArray2(i, 4) = "<茶類>"
            Case workArray2(i, 0) >= "16045" And workArray2(i, 0) <= "16049"
                 workArray2(i, 4) = "<コーヒー・ココア類>"
            Case workArray2(i, 0) >= "16050" And workArray2(i, 0) <= "16055"
                 workArray2(i, 4) = "<その他>"
            Case workArray2(i, 0) >= "17001" And workArray2(i, 0) <= "17054"
                 workArray2(i, 4) = "<調味料類>"
            Case workArray2(i, 0) >= "17055" And workArray2(i, 0) <= "17081"
                 workArray2(i, 4) = "<香辛料類>"
            Case workArray2(i, 0) >= "17082" And workArray2(i, 0) <= "17084"
                 workArray2(i, 4) = "<その他>"
            End Select
            On Error GoTo 0
            workArray2(i, 5) = myAr2(r, 6)
            On Error Resume Next
            Select Case True
            Case workArray2(i, 0) >= "10001" And workArray2(i, 0) <= "10278"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "10319" And workArray2(i, 0) <= "10341"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "10342" And workArray2(i, 0) <= "10362"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "10376" And workArray2(i, 0) <= "10388"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "11205" And workArray2(i, 0) <= "11240"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "11245" And workArray2(i, 0) <= "11246"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "11247" And workArray2(i, 0) <= "11247"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "13001" And workArray2(i, 0) <= "13050"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15001" And workArray2(i, 0) <= "15040"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15041" And workArray2(i, 0) <= "15068"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15069" And workArray2(i, 0) <= "15072"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15073" And workArray2(i, 0) <= "15085"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15086" And workArray2(i, 0) <= "15091"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15092" And workArray2(i, 0) <= "15100"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15101" And workArray2(i, 0) <= "15104"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15105" And workArray2(i, 0) <= "15113"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15114" And workArray2(i, 0) <= "15116"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "15117" And workArray2(i, 0) <= "15117"
                 workArray2(i, 5) = "<CANDIED FRUITS>"
            Case workArray2(i, 0) >= "15118" And workArray2(i, 0) <= "15120"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "16001" And workArray2(i, 0) <= "16032"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "16033" And workArray2(i, 0) <= "16044"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "16045" And workArray2(i, 0) <= "16049"
                 workArray2(i, 5) = "COFFEES AND COCOAS>"
            Case workArray2(i, 0) >= "16050" And workArray2(i, 0) <= "16055"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "17001" And workArray2(i, 0) <= "17054"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "17055" And workArray2(i, 0) <= "17081"
                 workArray2(i, 5) = ""
            Case workArray2(i, 0) >= "17082" And workArray2(i, 0) <= "17084"
                 workArray2(i, 5) = ""
            End Select
            On Error GoTo 0
            If workArray2(i, 6) <> "" And _
               workArray2(i, 7) = "" Then
                workArray2(i, 7) = myAr2(r, 8)
            End If
            If workArray2(i, 8) <> "" And _
               workArray2(i, 9) = "" Then
                If myAr2(r, 10) = "" Then
                    workArray2(i, 9) = myAr2(r, 15)
                Else
                    workArray2(i, 9) = myAr2(r, 10)
                End If
            End If
            If workArray2(i, 12) <> "" And _
               workArray2(i, 13) = "" Then
                workArray2(i, 13) = myAr2(r, 15)
            End If
            If workArray2(i, 14) <> "" Then
                workArray2(i, 15) = myAr2(r, 15)
            End If
            workArray2(i, 16) = myAr2(r, 11)
        End If
        Select Case True
            Case workArray2(i, 0) = "14004a"
                workArray2(i, 9) = "Safflower oil"
            Case workArray2(i, 0) = "14011a"
                workArray2(i, 9) = "Sunflower oil"
            Case workArray2(i, 0) = "14011b"
                workArray2(i, 9) = "Sunflower oil"
        End Select
    Next r
Next i
ReDim workArray3(UBound(workArray2), UBound(workArray2, 2))
For i = LBound(workArray3) To UBound(workArray3)
    workArray3(i, 0) = workArray2(i, 0)
    workArray3(i, 1) = workArray2(i, 1)
    workArray3(i, 2) = workArray2(i, 2)
    workArray3(i, 3) = workArray2(i, 4)
    workArray3(i, 4) = workArray2(i, 6)
    workArray3(i, 5) = workArray2(i, 8)
    workArray3(i, 6) = workArray2(i, 10)
    workArray3(i, 7) = workArray2(i, 12)
    workArray3(i, 8) = workArray2(i, 14)
    workArray3(i, 9) = workArray2(i, 3)
    workArray3(i, 10) = workArray2(i, 5)
    workArray3(i, 11) = workArray2(i, 7)
    workArray3(i, 12) = workArray2(i, 16)
    workArray3(i, 13) = workArray2(i, 9)
    workArray3(i, 14) = workArray2(i, 11)
    workArray3(i, 15) = workArray2(i, 13)
    workArray3(i, 16) = workArray2(i, 15)
Next i
Set mySht = Worksheets.Add
With mySht
    .Name = "M_CATEGORY"
    .Range("A1").Value = "ItemNumber"
    .Range("B1").Value = "FoodGroupNumber"
    .Range("C1").Value = "FoodGroupJP"
    .Range("D1").Value = "SubGroupJP"
    .Range("E1").Value = "SubCategoryJP"
    .Range("F1").Value = "MajorCategoryJP"
    .Range("G1").Value = "MediumCategoryJP"
    .Range("H1").Value = "MinorCategoryJP"
    .Range("I1").Value = "DetailsJP"
    .Range("J1").Value = "FoodGroupEN"
    .Range("K1").Value = "SubGroupEN"
    .Range("L1").Value = "SubCategoryEN"
    .Range("M1").Value = "AcademicName"
    .Range("N1").Value = "MajorCategoryEN"
    .Range("O1").Value = "MediumCategoryEN"
    .Range("P1").Value = "MinorCategoryEN"
    .Range("Q1").Value = "DetailsEN"
    .Range("A2:Q1892") = workArray3
End With
Set tmpSht = Nothing
Set tmpRng = Nothing
Set tmpArray = Nothing
Set workArray = Nothing
Set RegExp_Japanese = Nothing
Set RegExp_English = Nothing
Set RegExp_ItemNum = Nothing
Set RegExp_Square_Start = Nothing
Set RegExp_SquareExitEN = Nothing
Set RegExp_RoundStartJP = Nothing
Set RegExp_RoundStartEN = Nothing
Set RegExp_RoundExitEN = Nothing
Erase Item_Number()
Erase JapaneseItem()
Erase EnglishItem()
Erase JapaneseClass()
Erase English_Class()
Erase ItemNumArray()
Erase ItemENGArray()
Erase ClassArrayJP()
Erase ClassArrayEN()
Erase SubClassJapanese()
Erase SubClass_English()
Erase MidleClassJP()
Erase MidleClassEN()
Erase SubClass_JPN()
Erase SubClass_ENG()
Erase workArray2()
Erase workArray3()
Set mySht = Nothing
Set myRng = Nothing
Set myAr = Nothing
Set mySht2 = Nothing
Set myRng2 = Nothing
Set myAr2 = Nothing
End Sub

Function NoCancelArray(ByRef Sh As Variant) As Variant
Dim mySht           As Variant
Dim myRng           As Range
Dim tmpAr           As Variant
Dim i               As Long
Dim j               As Long
Dim RegExpCancel    As Object
Dim RegExp_Exit     As Object
Const StrCancel     As String = "^(1\)|residues)$"
Dim CancelItem()    As String
Dim CancelRow1()    As String
Dim CancelRow2()    As String
Dim myCancelAr()    As String
Dim Cancel_Array()  As String
    Set RegExpCancel = CreateObject("VBScript.RegExp")
    With RegExpCancel
        .Pattern = StrCancel
        .IgnoreCase = True
        .Global = True
    End With
tmpAr = Sh
j = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    If RegExpCancel.Test(tmpAr(i, 1)) Then
        ReDim Preserve CancelItem(j)
        ReDim Preserve CancelRow1(i)
        CancelItem(j) = tmpAr(i, 1)
        CancelRow1(j) = i
        j = j + 1
    End If
Next i
ReDim myCancelAr(UBound(CancelItem), 1)
For j = LBound(myCancelAr) To UBound(myCancelAr)
    myCancelAr(j, 0) = CancelItem(j)
    myCancelAr(j, 1) = CancelRow1(j)
Next j
ReDim Preserve myCancelAr(UBound(myCancelAr), 2)
j = 0
For i = LBound(myCancelAr) To UBound(myCancelAr) - 1
    If myCancelAr(i, 0) = "1)" Then
        If UBound(myCancelAr) >= 2 Then
            If myCancelAr(i + 2, 0) = "residues" Then
                myCancelAr(i, 2) = myCancelAr(i + 2, 1)
            Else
                myCancelAr(i, 2) = myCancelAr(i + 1, 1)
            End If
        Else
            myCancelAr(i, 2) = myCancelAr(i + 1, 1)
        End If
        j = j + 1
    End If
Next i
Erase CancelRow1
j = 0
ReDim CancelRow1(j)
ReDim CancelRow2(j)
CancelRow1(j) = myCancelAr(j, 1)
CancelRow2(j) = myCancelAr(j, 2)
For i = LBound(myCancelAr) + 1 To UBound(myCancelAr)
    If myCancelAr(i, 0) = "1)" And _
       myCancelAr(i - 1, 0) <> "1)" Then
        j = j + 1
        ReDim Preserve CancelRow1(j)
        ReDim Preserve CancelRow2(j)
        CancelRow1(j) = myCancelAr(i, 1)
        CancelRow2(j) = myCancelAr(i, 2)
    End If
Next i
ReDim Cancel_Array(UBound(CancelRow1), 1)
j = 0
For j = LBound(Cancel_Array) To UBound(Cancel_Array)
    Cancel_Array(j, 0) = CancelRow1(j)
    Cancel_Array(j, 1) = CancelRow2(j)
Next j
j = 0
Cancel_Array(j, 0) = 1
Cancel_Array(j, 1) = CancelRow1(j)
For j = LBound(Cancel_Array) + 1 To UBound(Cancel_Array)
    Cancel_Array(j, 0) = CancelRow2(j - 1) + 1
    Cancel_Array(j, 1) = CancelRow1(j) - 1
Next j
NoCancelArray = Cancel_Array
End Function

参照:
日本食品標準成分表2010の食品番号をカテゴリー分類する その1
日本食品標準成分表2010のcsvファイル

日本食品標準成分表2010のcsvファイル

2012年1月12日,日本食品標準成分表2010のテキストデータの記事を投稿しました.その際に’M_FOODS.txt’ファイルに各項目の名称と単位を付記されたいとの依頼があったと追記しました.今回は表頭に各項目名と単位を付記したcsvファイルを作成しましたので公開いたします.

元のPDFファイルでは各要素内にコンマを使用しており,デリミタとしてスペースを使用していました.本ファイルではタブをデリミタとして使用しています.EXCELでは通常ダブルクォーテーションを引用符として用いますが,他のデータベースソフトではシングルクォーテーションを引用符として用いるものもあります.そのため本ファイルでは引用符を使用しておりません.EXCELに取り込む際にはダブルクリックで開かず,必ず’データ’タブの’外部データをインポートする’から’テキストファイル’を選択してください.またテキストファイルウィザードの最後のタブで1列目のデータ型を’文字列’にしてください.以上の注意点を守らない場合,食品番号は通常5桁の数値ですが,先頭の’0’が欠落する場合があります.



本ファイルは54列1881行から成ります.表頭3行はデータ構造を示し,4行目以降が実際のデータです.1行目は日本語の項目名,2行目は英語の項目名,3行目は単位です.gはグラム,mgはミリグラム,microgramはマイクログラムです.

以下の点にご注意ください.

1.「日本食品標準成分表2010」に記載されている,(0),Tr,(Tr),-,について,当データでは「 0 」と表記しています.

2.本表の食品成分値は文部科学省科学技術・学術審議会資源調査分科会報告「日本食品標準成分表2010」によるものです.食品成分値を複製又は転載する場合は事前に文部科学省への許可申請もしくは届け出が必要となる場合があります.

連絡先:文部科学省科学技術・学術政策局政策課資源室 E-mail: kagseis@mext.go.jp

M_FOODS.csv

参照:
日本食品標準成分表2010の食品番号をカテゴリー分類する その1
日本食品標準成分表2010の食品番号をカテゴリー分類する その2

日本食品標準成分表2010の食品番号をカテゴリー分類する その1

日本食品標準成分表2010の食品番号を分類するの記事で食品番号を分類する記事を掲載しましたが,不十分な分類しか出来ておりませんでした.今回は既に日本語の完成した分類を見つけましたので,それを元に英語もつけて分類しました.参考にしたのは以下のファイルです.

資源調査分科会報告「日本食品標準成分表2010」について

新しいブックを用意します.”1299012_1.pdf”から”1299012_18.pdf”までのPDFの全テキストをSheet1にオプションでペーストします.その際,下の行方向に行の間隔を空けずに貼り付けます.テキストファイルウィザードで最初のカラムのデータ形式を『文字列』に変更します.原材料的食品のもととなる生物の学名でダウンロードしたPDFの全テキストを選択し,Sheet2にオプションでペーストします.テキストファイルウィザードが開くので,1/3では元のデータ形式で『カンマやタブなどの区切り文字によってフィールドごとに区切られたデータ』を選択します.テキストファイルウィザード2/3では『連続した区切り文字は1文字とみなす』のチェックを外して次に進みます.テキストファイルウィザード3/3では最初の列のデータ形式を『文字列』に変更して完了をクリックします.このEXCELブックに”Sample.xlsm”と名前を付けて保存します.

“Sample.xlsm”ブックを開き,Alt+F11キーを押下してVBEを起動します.挿入メニューから標準モジュールを選択し,下記のコードを貼り付けます.Separate_by_Parentプロシージャを実行すると”Result”という名前のシートが出来ます.

Option Explicit
Function MajorCategoryAr(ByRef Sh As Worksheet) As String()
Dim mySht               As Worksheet
Dim myRng               As Range
Dim tmpAr               As Variant
Dim StartEnd            As Variant
Dim strFoodGroup        As String
Dim strFoodGroupJP      As String
Dim strFoodGroupEN      As String
Dim strSubFoodGroup     As String
Dim strSubFoodGroupJP   As String
Dim strSubFoodGroupEN   As String
Dim strSub_Category     As String
Dim strSub_CategoryJP   As String
Dim strSub_CategoryEN   As String
Dim strMajor_Category   As String
Dim StartNumber()       As String
Dim Exit_Number()       As String
Dim FoodGroupJP()       As String
Dim FoodGroupEN()       As String
Dim Sub_FoodGroup_JP()  As String
Dim Sub_FoodGroup_EN()  As String
Dim Sub_Category_JPN()  As String
Dim Sub_Category_ENG()  As String
Dim Major_CategoryJP()  As String
Dim Major_CategoryEN()  As String
Dim Major_CategoryLT()  As String
Dim myArray()           As String
Dim i As Long
Dim j As Long
Dim k As Long
Dim n As Long
Dim RegExp_3_Digit_Num  As Object
Dim RegExp_Item_Number  As Object
Dim RegExp_SentakuHanni As Object
Dim RegExp_SubCategory1 As Object
Dim RegExp_SubCategory2 As Object
Dim RegExp_MedCategory  As Object
Dim RegExp_Foods_Group  As Object
Dim RegExp_Jpn_Eng_Mix  As Object
Dim RegExp_JapaneseOnly As Object
Dim RegExp_Upper_Lower  As Object
Dim RegExp_Upper_Only   As Object
Dim RegExp_Lower_Only   As Object
Dim RegExp_RoundBracket As Object
Dim RegExp_SquareBracket    As Object
Dim RegExp_AngleBracket As Object
Dim myMatches           As Object
Dim myMatch             As Object
Const Ptn_3_Digit_Num   As String = "[0-9]{3}$"
Const Ptn_Item_Number   As String = "^[0-9]{5}$"
Const Ptn_SentakuHanni  As String = "(,|~)"
Const Ptn_SubCategory1  As String = "^((|\().()|\))$"
Const Ptn_SubCategory2  As String = "^(<|>)$"
Const Ptn_MedCategory   As String = "^\[.\]$"
Const Ptn_FoodGroupNum  As String = "^([0-9]|[0-9]{2})$"
Const Ptn_Jpn_Eng_Mix   As String = "^[^A-Za-z0-9]+(\([^A-Za-z0-9]+\))?([A-Za-z])"
Const Ptn_JapaneseOnly  As String = "^[^A-Za-z0-9]+(\([^A-Za-z0-9]+\))?$"
Const Ptn_Upper_Lower   As String = "[A-Z][a-z]+"
Const Ptn_Upper_Only    As String = "[A-Z]+"
Const Ptn_Lower_Only    As String = "^[a-z]+$"
Const Ptn_RoundStart    As String = "^[\((]"
Const Ptn_Round_Exit    As String = "[\((][^A-Za-z0-9]+[\))]"
Const Ptn_SquareStart   As String = "^\["
Const Ptn_Square_Exit   As String = "\[[^A-Za-z0-9]+\]"
Const Ptn_AngleStart    As String = "^[\<<]"
Const Ptn_Angle_Exit    As String = "[\<<][^A-Za-z0-9]+[\>>]"
Set mySht = Sh
Set myRng = mySht.UsedRange
tmpAr = myRng
Set RegExp_3_Digit_Num = CreateObject("VBScript.RegExp")
Set RegExp_Item_Number = CreateObject("VBScript.RegExp")
Set RegExp_SentakuHanni = CreateObject("VBScript.RegExp")
Set RegExp_SubCategory1 = CreateObject("VBScript.RegExp")
Set RegExp_SubCategory2 = CreateObject("VBScript.RegExp")
Set RegExp_MedCategory = CreateObject("VBScript.RegExp")
Set RegExp_Foods_Group = CreateObject("VBScript.RegExp")
Set RegExp_Jpn_Eng_Mix = CreateObject("VBScript.RegExp")
Set RegExp_JapaneseOnly = CreateObject("VBScript.RegExp")
Set RegExp_Upper_Lower = CreateObject("VBScript.RegExp")
Set RegExp_Upper_Only = CreateObject("VBScript.RegExp")
Set RegExp_Lower_Only = CreateObject("VBScript.RegExp")
Set RegExp_RoundBracket = CreateObject("VBScript.RegExp")
Set RegExp_SquareBracket = CreateObject("VBScript.RegExp")
Set RegExp_AngleBracket = CreateObject("VBScript.RegExp")
With RegExp_3_Digit_Num
    .Pattern = "[0-9]{3}$"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_Item_Number
    .Pattern = "^[0-9]{5}$"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_SentakuHanni
    .Pattern = "(,|~)"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_SubCategory1
    .Pattern = "^((|\().()|\))$"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_SubCategory2
    .Pattern = "^(<|>)$"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_MedCategory
    .Pattern = "^\[.\]$"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_Foods_Group
    .Pattern = "^([0-9]|[0-9]{2})$"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_Jpn_Eng_Mix
    .Pattern = "^[^A-Za-z0-9]+(\([^A-Za-z0-9]+\))?([A-Za-z])"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_JapaneseOnly
    .Pattern = "^[^A-Za-z0-9]+(\([^A-Za-z0-9]+\))?$"
    .IgnoreCase = True
    .Global = True
End With
With RegExp_Upper_Lower
    .Pattern = "[A-Z][a-z]+"
    .IgnoreCase = False
    .Global = True
End With
With RegExp_Upper_Only
    .Pattern = "[A-Z]+"
    .IgnoreCase = False
    .Global = True
End With
With RegExp_Lower_Only
    .Pattern = "^[a-z]+$"
    .IgnoreCase = False
    .Global = True
End With
j = 0
For i = LBound(tmpAr) + 1 To UBound(tmpAr)
    With RegExp_RoundBracket
        .Pattern = Ptn_RoundStart
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_SquareBracket
        .Pattern = Ptn_SquareStart
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_AngleBracket
        .Pattern = Ptn_AngleStart
        .IgnoreCase = True
        .Global = True
    End With
    strFoodGroup = ""
    strSubFoodGroup = ""
    strSub_Category = ""
    strMajor_Category = ""
    ReDim Preserve StartNumber(j)
    ReDim Preserve Exit_Number(j)
    ReDim Preserve FoodGroupJP(j)
    ReDim Preserve FoodGroupEN(j)
    ReDim Preserve Sub_FoodGroup_JP(j)
    ReDim Preserve Sub_FoodGroup_EN(j)
    ReDim Preserve Sub_Category_JPN(j)
    ReDim Preserve Sub_Category_ENG(j)
    ReDim Preserve Major_CategoryJP(j)
    ReDim Preserve Major_CategoryEN(j)
    ReDim Preserve Major_CategoryLT(j)
    If RegExp_3_Digit_Num.Test(tmpAr(i, 1)) Then
        Select Case True
        Case RegExp_Item_Number.Test(tmpAr(i, 1))
            StartNumber(j) = tmpAr(i, 1)
            Exit_Number(j) = tmpAr(i, 1)
        Case RegExp_SentakuHanni.Test(tmpAr(i, 1))
            StartEnd = StartExit(tmpAr(i, 1))
            StartNumber(j) = StartEnd(0)
            Exit_Number(j) = StartEnd(1)
            Erase StartEnd
        End Select
        FoodGroupJP(j) = strFoodGroupJP
        FoodGroupEN(j) = strFoodGroupEN
        If (i >= 19 And i <= 27) _
        Or (i >= 370 And i <= 596) _
        Or (i >= 599 And i <= 626) _
        Or (i >= 635 And i <= 639) _
        Or (i >= 646 And i <= 668) _
        Then
            Sub_FoodGroup_JP(j) = strSubFoodGroupJP
            Sub_FoodGroup_EN(j) = strSubFoodGroupEN
        End If
        If tmpAr(i, 2) = "" Then
            Sub_Category_JPN(j) = strSub_CategoryJP
            Sub_Category_ENG(j) = strSub_CategoryEN
        End If
        For k = 2 To 8
            strMajor_Category = strMajor_Category & " " & tmpAr(i, k)
        Next k
        strMajor_Category = Trim(strMajor_Category)
        On Error Resume Next
        For k = 1 To 8
            If RegExp_Lower_Only.Test(tmpAr(i + 1, 1)) _
            And Not RegExp_SubCategory1.Test(tmpAr(i + 1, 1)) _
            And Not RegExp_SubCategory2.Test(tmpAr(i + 1, 1)) _
            And Not RegExp_Foods_Group.Test(tmpAr(i + 1, 1)) _
            And Not RegExp_3_Digit_Num.Test(tmpAr(i + 1, 1)) _
            Then
                strMajor_Category = strMajor_Category & " " & tmpAr(i + 1, k)
            End If
        Next k
        On Error GoTo 0
        strMajor_Category = Trim(strMajor_Category)
        If RegExp_Jpn_Eng_Mix.Test(strMajor_Category) Then
            StartEnd = Separate_Jpn_Eng(strMajor_Category)
            Major_CategoryJP(j) = StartEnd(0)
            Erase StartEnd
            Set myMatches = RegExp_Upper_Lower.Execute(strMajor_Category)
            Major_CategoryEN(j) = Mid(strMajor_Category, _
                                    myMatches.Item(0).firstindex + 1, _
                                    myMatches.Item(myMatches.Count - 1).firstindex _
                                  - myMatches.Item(0).firstindex - 1)
            Set myMatch = myMatches.Item(myMatches.Count - 1)
            Major_CategoryLT(j) = Mid(strMajor_Category, myMatch.firstindex + 1)
        Else
        End If
    Else
        Select Case True
        Case RegExp_Foods_Group.Test(tmpAr(i, 1))
            For k = 2 To 8
                strFoodGroup = strFoodGroup & " " & tmpAr(i, k)
            Next k
            strFoodGroup = Trim(strFoodGroup)
            Select Case True
            Case RegExp_Jpn_Eng_Mix.Test(strFoodGroup)
                Set myMatches = RegExp_Jpn_Eng_Mix.Execute(strFoodGroup)
                Set myMatch = myMatches.Item(0)
                strFoodGroupJP = Left(strFoodGroup, myMatches.Item(0).Length - 1)
                strFoodGroupEN = Mid(strFoodGroup, myMatches.Item(0).Length)
            Case RegExp_JapaneseOnly.Test(strFoodGroup)
                Set myMatches = RegExp_JapaneseOnly.Execute(strFoodGroup)
                Set myMatch = myMatches.Item(0)
                strFoodGroupJP = Left(strFoodGroup, myMatches.Item(0).Length - 1)
                strFoodGroupEN = Mid(strFoodGroup, myMatches.Item(0).Length)
            Case Else
            End Select
        Case RegExp_AngleBracket.Test(tmpAr(i, 1))
            For k = 1 To 8
                strSubFoodGroup = strSubFoodGroup & " " & tmpAr(i, k)
            Next k
            strSubFoodGroup = Trim(strSubFoodGroup)
            With RegExp_AngleBracket
                .Pattern = Ptn_Angle_Exit
                .IgnoreCase = True
                .Global = True
            End With
            Set myMatches = RegExp_AngleBracket.Execute(strSubFoodGroup)
            strSubFoodGroupJP = myMatches.Item(0).Value
            strSubFoodGroupEN = Mid(strSubFoodGroup, myMatches.Item(0).Length + 2)
            strSubFoodGroupEN = Replace(strSubFoodGroupEN, "<", "<")
            strSubFoodGroupEN = Replace(strSubFoodGroupEN, ">", ">")
        Case RegExp_RoundBracket.Test(tmpAr(i, 1))
            For k = 1 To 8
                strSub_Category = strSub_Category & " " & tmpAr(i, k)
            Next k
            strSub_Category = Trim(strSub_Category)
            With RegExp_RoundBracket
                .Pattern = Ptn_Round_Exit
                .IgnoreCase = True
                .Global = True
            End With
            Set myMatches = RegExp_RoundBracket.Execute(strSub_Category)
            On Error Resume Next
            strSub_CategoryJP = myMatches.Item(0).Value
            strSub_CategoryJP = Replace(strSub_CategoryJP, "(", "(")
            strSub_CategoryJP = Replace(strSub_CategoryJP, ")", ")")
            strSub_CategoryEN = Mid(strSub_Category, myMatches.Item(0).Length + 2)
            strSub_CategoryEN = Replace(strSub_CategoryEN, "(", "(")
            strSub_CategoryEN = Replace(strSub_CategoryEN, ")", ")")
            On Error GoTo 0
        Case Else
        End Select
        j = j - 1
    End If
    j = j + 1
Next i
ReDim myArray(UBound(StartNumber), 10)
For n = LBound(myArray) To UBound(myArray)
    myArray(n, 0) = StartNumber(n)
    myArray(n, 1) = Exit_Number(n)
    myArray(n, 2) = FoodGroupJP(n)
    myArray(n, 3) = FoodGroupEN(n)
    myArray(n, 4) = Sub_FoodGroup_JP(n)
    myArray(n, 5) = Sub_FoodGroup_EN(n)
    myArray(n, 6) = Sub_Category_JPN(n)
    myArray(n, 7) = Sub_Category_ENG(n)
    myArray(n, 8) = Major_CategoryJP(n)
    myArray(n, 9) = Major_CategoryEN(n)
    myArray(n, 10) = Major_CategoryLT(n)
Next n
MajorCategoryAr = myArray
End Function

Function StartExit(ByVal InputStr As String) As String()
    Dim str     As String
    Dim Ar()    As String
    str = InputStr
    ReDim Ar(1)
    Ar(0) = Left(str, 5)
    Ar(1) = Left(str, 2) & Right(str, 3)
    StartExit = Ar
End Function

Function Separate_Jpn_Eng(ByVal InputStr As String) As String()
    Dim str                 As String
    Dim Ar()                As String
    Dim RegExp_Jpn_Eng_Mix  As Object
    Dim myMatches           As Object
    Dim myMatch             As Object
    Const Ptn_Jpn_Eng_Mix   As String = "^[^A-Za-z0-9]+(\([^A-Za-z0-9]+\))?([A-Za-z])"
    str = InputStr
    ReDim Ar(1)
    Set RegExp_Jpn_Eng_Mix = CreateObject("VBScript.RegExp")
    With RegExp_Jpn_Eng_Mix
        .Pattern = Ptn_Jpn_Eng_Mix
        .IgnoreCase = True
        .Global = True
    End With
    Set myMatches = RegExp_Jpn_Eng_Mix.Execute(str)
    For Each myMatch In myMatches
        If myMatches.Count > 0 Then
            Ar(0) = Left(str, myMatches.Item(0).Length - 1)
            Ar(1) = Mid(str, myMatches.Item(0).Length)
        End If
    Next myMatch
    Separate_Jpn_Eng = Ar
End Function

Sub Separate_by_Parent()
Dim mySht1              As Worksheet
Dim mySht2              As Worksheet
Dim mySht3              As Worksheet
Dim myRng               As Range
Dim tmpAr               As Variant
Dim Major_CategoryAr    As Variant
Dim No_Cancel_Ar        As Variant
Dim ItemNamAr()         As String
Dim ItemNumAr()         As String
Dim JapaneseName()      As String
Dim English_Name()      As String
Dim ItemArray()         As String
Dim Residual_JPN()      As String
Dim Residual_ENG()      As String
Dim Residual_Row()      As String
Dim i                   As Long
Dim j                   As Long
Dim k                   As Long
Dim m                   As Long
Dim n                   As Long
Dim p                   As Long
Dim q                   As Long
Dim r                   As Long
Dim s                   As Long
Dim t                   As Long
Dim str_JPN_Analyse     As String
Dim str_ENG_Analyse     As String
Dim strFoodGroup        As String
Dim strFoodGroupJP      As String
Dim strFoodGroupEN      As String
Dim strSubFoodGroup     As String
Dim strSubFoodGroupJP   As String
Dim strSubFoodGroupEN   As String
Dim strSub_Category     As String
Dim strSub_CategoryJP   As String
Dim strSub_CategoryEN   As String
Dim strMajor_Category   As String
Dim strMajor_CategoryJP As String
Dim strMajor_CategoryEN As String
Dim strMediumCategory   As String
Dim strMediumCategoryJP As String
Dim strMediumCategoryEN As String
Dim strMinor_Category   As String
Dim strMinor_CategoryJP As String
Dim strMinor_CategoryEN As String
Dim strDetailCategory   As String
Dim strDetailCategoryJP As String
Dim strDetailCategoryEN As String
Dim FoodGrouNum()       As String
Dim FoodGroupJP()       As String
Dim FoodGroupEN()       As String
Dim Sub_FoodGroup_JP()  As String
Dim Sub_FoodGroup_EN()  As String
Dim Sub_Group_JP_Row()  As String
Dim Sub_Group_EN_Row()  As String
Dim Sub_Category_JPN()  As String
Dim Sub_Category_ENG()  As String
Dim SubCategory_RowJ()  As String
Dim SubCategory_RowE()  As String
Dim Major_CategoryJP()  As String
Dim Major_CategoryEN()  As String
Dim Major_CategoryLT()  As String
Dim Major_JPN_RowNum()  As String
Dim Major_ENG_RowNum()  As String
Dim Major_Temp_Array()  As String
Dim MediumCategoryJP()  As String
Dim MediumCategoryEN()  As String
Dim Med_JP_RowNumber()  As Long
Dim Med_EN_RowNumber()  As Long
Dim Med_Category_JPN()  As String
Dim Med_Category_ENG()  As String
Dim MediumCategoryAr()  As String
Dim Minor_CategoryJP()  As String
Dim Minor_CategoryEN()  As String
Dim Min_JP_RowNumber()  As Long
Dim Min_EN_RowNumber()  As Long
Dim Min_Category_JPN()  As String
Dim Min_Category_ENG()  As String
Dim Minor_CategoryAr()  As String
Dim DetailCategoryJP()  As String
Dim DetailCategoryEN()  As String
Const Ptn_FoodGroupNum  As String = "^([0-9]|[0-9]{2})$"
Const Ptn_Jpn_Eng_Mix   As String = "^[^A-Za-z0-9]+(\([^A-Za-z0-9]+\))?([A-Za-z])"
Const Ptn_JapaneseOnly  As String = "^[^A-Za-z0-9\*]+(\([^A-Za-z0-9]+\))?$"
Const Ptn_Upper_Lower   As String = "[A-Za-z\s:\-,]+" '"[A-Za-z,\s]+"
Const Ptn_Upper_Only    As String = "[A-Z]+"
Const Ptn_Lower_Only    As String = "^[a-z]+$"
Const Ptn_AngleStart    As String = "^[\<<]"
Const Ptn_Angle_JPN     As String = "[<<].+[>>]"
Const Ptn_Angle_ENG     As String = "[<<].+[>>]"
Const Ptn_RoundStart    As String = "^[\((][^0-9]+"
Const Ptn_Round_JPN     As String = "[\((][^A-Za-z0-9]+[\))]"
Const Ptn_Round_ENG     As String = "[\((][A-Za-z\s]+[\))]"
Const Ptn_SquareStart   As String = "^\["
Const Ptn_Square_JPN    As String = "\[[^A-Za-z0-9]+\]"
Const Ptn_Square_ENG    As String = "\[[A-Za-z\s:\-,]+(\]|])"
Dim RegExp_MedCategory      As Object
Dim RegExp_Foods_Group      As Object
Dim RegExp_Jpn_Eng_Mix      As Object
Dim RegExp_JapaneseOnly     As Object
Dim RegExp_English_Only     As Object
Dim RegExp_Upper_Lower      As Object
Dim RegExp_Upper_Only       As Object
Dim RegExp_Lower_Only       As Object
Dim RegExp_Angle_Bracket    As Object
Dim RegExp_Angle_Bracket_JP As Object
Dim RegExp_Angle_Bracket_EN As Object
Dim RegExp_Round_Bracket    As Object
Dim RegExp_Round_Bracket_JP As Object
Dim RegExp_Round_Bracket_EN As Object
Dim RegExp_SquareBracket    As Object
Dim RegExp_SquareBracket_JP As Object
Dim RegExp_SquareBracket_EN As Object
Dim RegExp_5_Number     As Object
Dim RegExp_Japanese     As Object
Dim RegExp_Alphabet     As Object
Dim myMatches           As Object
Dim myMatch             As Object
Const Ptn_5_Number      As String = "^[0-9]{5}$"
Const Ptn_Japanese      As String = "[^A-Za-z0-9]{2,}"
Const Ptn_Alphabet      As String = "^[A-Za-z]{2,}"
Dim CEREALS             As Long
Dim POTATOES            As Long
Dim SUGARS              As Long
Dim PULSES              As Long
Dim NUTS                As Long
Dim VEGETABLES          As Long
Dim FRUITS              As Long
Dim MUSHROOMS           As Long
Dim ALGAE               As Long
Dim FISHES              As Long
Dim MEATS               As Long
Dim EGGS                As Long
Dim MILK                As Long
Dim OIL                 As Long
Dim CONFECTIONERIES     As Long
Dim BEVERAGES           As Long
Dim SEASONINGS          As Long
Dim PREPARED            As Long
Dim RegExpJapaneseName  As Object
Const Ptn_JapaneseName  As String = "^([0-9%]{1,3})?[^A-Za-z0-9]+"
Set RegExpJapaneseName = CreateObject("VBScript.RegExp")
With RegExpJapaneseName
    .Pattern = Ptn_JapaneseName
    .IgnoreCase = True
    .Global = True
End With
Dim RegExp_EnglishName  As Object
Dim Ptn_EnglishName   As String
Ptn_EnglishName = "^[A-Za-z0-9%\.,\-'" & ChrW(&HC0) & "-" & ChrW(&HFF) & "]+$"
Set RegExp_EnglishName = CreateObject("VBScript.RegExp")
With RegExp_EnglishName
    .Pattern = Ptn_EnglishName
    .IgnoreCase = True
    .Global = True
End With
Set RegExp_5_Number = CreateObject("VBScript.RegExp")
Set RegExp_MedCategory = CreateObject("VBScript.RegExp")
Set RegExp_Foods_Group = CreateObject("VBScript.RegExp")
Set RegExp_Jpn_Eng_Mix = CreateObject("VBScript.RegExp")
Set RegExp_JapaneseOnly = CreateObject("VBScript.RegExp")
Set RegExp_English_Only = CreateObject("VBScript.RegExp")
Set RegExp_Upper_Lower = CreateObject("VBScript.RegExp")
Set RegExp_Upper_Only = CreateObject("VBScript.RegExp")
Set RegExp_Lower_Only = CreateObject("VBScript.RegExp")
Set RegExp_Angle_Bracket = CreateObject("VBScript.RegExp")
Set RegExp_Angle_Bracket_JP = CreateObject("VBScript.RegExp")
Set RegExp_Angle_Bracket_EN = CreateObject("VBScript.RegExp")
Set RegExp_Round_Bracket = CreateObject("VBScript.RegExp")
Set RegExp_Round_Bracket_JP = CreateObject("VBScript.RegExp")
Set RegExp_Round_Bracket_EN = CreateObject("VBScript.RegExp")
Set RegExp_SquareBracket = CreateObject("VBScript.RegExp")
Set RegExp_SquareBracket_JP = CreateObject("VBScript.RegExp")
Set RegExp_SquareBracket_EN = CreateObject("VBScript.RegExp")
Set RegExp_Japanese = CreateObject("VBScript.RegExp")
Set RegExp_Alphabet = CreateObject("VBScript.RegExp")
    With RegExp_5_Number
        .Pattern = Ptn_5_Number
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Angle_Bracket
        .Pattern = Ptn_AngleStart
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Angle_Bracket_JP
        .Pattern = Ptn_Angle_JPN
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Angle_Bracket_EN
        .Pattern = Ptn_Angle_ENG
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Round_Bracket
        .Pattern = Ptn_RoundStart
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Round_Bracket_JP
        .Pattern = Ptn_Round_JPN
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Round_Bracket_EN
        .Pattern = Ptn_Round_ENG
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_SquareBracket
        .Pattern = Ptn_SquareStart
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_SquareBracket_JP
        .Pattern = Ptn_Square_JPN
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_SquareBracket_EN
        .Pattern = Ptn_Square_ENG
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Japanese
        .Pattern = Ptn_Japanese
        .IgnoreCase = True
        .Global = True
    End With
    With RegExp_Alphabet
        .Pattern = Ptn_Alphabet
        .IgnoreCase = False
        .Global = True
    End With
    With RegExp_JapaneseOnly
        .Pattern = Ptn_JapaneseOnly
        .IgnoreCase = True
        .Global = True
    End With
Set mySht1 = Worksheets("Sheet1")
Set mySht2 = Worksheets("Sheet2")
Set myRng = mySht1.UsedRange
tmpAr = myRng
Major_CategoryAr = MajorCategoryAr(mySht2)
ReDim Preserve Major_CategoryAr(UBound(Major_CategoryAr), UBound(Major_CategoryAr, 2) + 2)
m = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    For n = LBound(Major_CategoryAr) To UBound(Major_CategoryAr)
        If Major_CategoryAr(n, 0) = tmpAr(i, 1) Then
            Major_CategoryAr(n, 11) = i
        End If
        If Major_CategoryAr(n, 1) = tmpAr(i, 1) Then
            Major_CategoryAr(n, 12) = i
        End If
    Next n
Next i
m = 0
n = 0
p = 0
q = 0
No_Cancel_Ar = NoCancelArray(mySht1)
For r = LBound(No_Cancel_Ar) To UBound(No_Cancel_Ar)
For i = No_Cancel_Ar(r, 0) To No_Cancel_Ar(r, 1)
        str_JPN_Analyse = ""
        str_ENG_Analyse = ""
        On Error Resume Next
        For k = 1 To 5
            str_JPN_Analyse = str_JPN_Analyse & tmpAr(i, k)
            str_ENG_Analyse = str_ENG_Analyse & " " & tmpAr(i + 1, k)
            str_ENG_Analyse = Replace(str_ENG_Analyse, "  ", " ")
        Next k
        For k = 1 To 3
            str_ENG_Analyse = str_ENG_Analyse & " " & tmpAr(i + 2, k)
            str_ENG_Analyse = Replace(str_ENG_Analyse, "  ", " ")
        Next k
        On Error GoTo 0
        str_ENG_Analyse = Trim(str_ENG_Analyse)
        Select Case True
        Case RegExp_Angle_Bracket.Test(str_JPN_Analyse) And _
             RegExp_Angle_Bracket.Test(str_ENG_Analyse)
            ReDim Preserve Sub_FoodGroup_JP(p)
            ReDim Preserve Sub_FoodGroup_EN(p)
            ReDim Preserve Sub_Group_JP_Row(p)
            ReDim Preserve Sub_Group_EN_Row(p)
            Set myMatches = RegExp_Angle_Bracket_JP.Execute(str_JPN_Analyse)
            Sub_FoodGroup_JP(p) = myMatches.Item(0).Value
            Sub_Group_JP_Row(p) = i
            Set myMatches = RegExp_Angle_Bracket_EN.Execute(str_ENG_Analyse)
            Sub_FoodGroup_EN(p) = myMatches.Item(0).Value
            Sub_FoodGroup_EN(p) = Replace(Sub_FoodGroup_EN(p), "<", "<")
            Sub_FoodGroup_EN(p) = Replace(Sub_FoodGroup_EN(p), ">", ">")
            Sub_Group_EN_Row(p) = i + 1
            p = p + 1
        Case RegExp_Round_Bracket_JP.Test(str_JPN_Analyse) And _
             RegExp_Round_Bracket_EN.Test(str_ENG_Analyse)
            ReDim Preserve Sub_Category_JPN(n)
            ReDim Preserve Sub_Category_ENG(n)
            ReDim Preserve SubCategory_RowJ(n)
            ReDim Preserve SubCategory_RowE(n)
            Set myMatches = RegExp_Round_Bracket_JP.Execute(str_JPN_Analyse)
            Sub_Category_JPN(n) = myMatches.Item(0).Value
            Sub_Category_JPN(n) = Replace(Sub_Category_JPN(n), "(", "(")
            Sub_Category_JPN(n) = Replace(Sub_Category_JPN(n), ")", ")")
            SubCategory_RowJ(n) = i
            Set myMatches = RegExp_Round_Bracket_EN.Execute(str_ENG_Analyse)
            Sub_Category_ENG(n) = myMatches.Item(0).Value
            Sub_Category_ENG(n) = Replace(Sub_Category_ENG(n), "(", "(")
            Sub_Category_ENG(n) = Replace(Sub_Category_ENG(n), ")", ")")
            SubCategory_RowE(n) = i + 1
            n = n + 1
        Case RegExp_SquareBracket_JP.Test(str_JPN_Analyse) And _
             RegExp_SquareBracket_EN.Test(str_ENG_Analyse)
            ReDim Preserve MediumCategoryJP(m)
            ReDim Preserve Med_JP_RowNumber(m)
            ReDim Preserve MediumCategoryEN(m)
            ReDim Preserve Med_EN_RowNumber(m)
            Set myMatches = RegExp_SquareBracket_JP.Execute(str_JPN_Analyse)
            MediumCategoryJP(m) = myMatches.Item(0).Value
            Med_JP_RowNumber(m) = i
            Set myMatches = RegExp_SquareBracket_EN.Execute(str_ENG_Analyse)
            MediumCategoryEN(m) = myMatches.Item(0).Value
            Med_EN_RowNumber(m) = i + 1
            m = m + 1
        Case RegExp_Japanese.Test(str_JPN_Analyse) And _
             RegExp_Alphabet.Test(str_ENG_Analyse)
            ReDim Preserve Major_CategoryJP(q)
            ReDim Preserve Major_CategoryEN(q)
            ReDim Preserve Major_JPN_RowNum(q)
            ReDim Preserve Major_ENG_RowNum(q)
            Set myMatches = RegExp_Japanese.Execute(str_JPN_Analyse)
            Major_CategoryJP(q) = myMatches.Item(0).Value
            Major_JPN_RowNum(q) = i
            Set myMatches = RegExp_Alphabet.Execute(str_ENG_Analyse)
            Major_CategoryEN(q) = myMatches.Item(0).Value
            Major_ENG_RowNum(q) = i + 1
            q = q + 1
        Case Else
        End Select
Next i
Next r
ReDim Major_Temp_Array(UBound(Major_CategoryJP), 5)
For q = LBound(Major_Temp_Array) To UBound(Major_Temp_Array) - 1
    Major_Temp_Array(q, 0) = Major_CategoryJP(q)
    Major_Temp_Array(q, 1) = Major_JPN_RowNum(q)
    Major_Temp_Array(q, 2) = Major_JPN_RowNum(q + 1)
    Major_Temp_Array(q, 3) = Major_CategoryEN(q)
    Major_Temp_Array(q, 4) = Major_ENG_RowNum(q)
    Major_Temp_Array(q, 5) = Major_ENG_RowNum(q + 1)
Next q
    Major_Temp_Array(q, 0) = Major_CategoryJP(UBound(Major_Temp_Array))
    Major_Temp_Array(q, 1) = Major_JPN_RowNum(UBound(Major_Temp_Array))
    Major_Temp_Array(q, 2) = 32757
    Major_Temp_Array(q, 3) = Major_CategoryEN(UBound(Major_Temp_Array))
    Major_Temp_Array(q, 4) = Major_ENG_RowNum(UBound(Major_Temp_Array))
    Major_Temp_Array(q, 5) = 32757
ReDim MediumCategoryAr(UBound(MediumCategoryJP), 5)
For m = LBound(MediumCategoryAr) To UBound(MediumCategoryAr) - 1
    MediumCategoryAr(m, 0) = MediumCategoryJP(m)
    MediumCategoryAr(m, 1) = Med_JP_RowNumber(m)
    MediumCategoryAr(m, 2) = Med_JP_RowNumber(m + 1)
    MediumCategoryAr(m, 3) = MediumCategoryEN(m)
    MediumCategoryAr(m, 4) = Med_EN_RowNumber(m)
    MediumCategoryAr(m, 5) = Med_EN_RowNumber(m + 1)
Next m
    MediumCategoryAr(m, 0) = MediumCategoryJP(UBound(MediumCategoryAr))
    MediumCategoryAr(m, 1) = Med_JP_RowNumber(UBound(MediumCategoryAr))
    MediumCategoryAr(m, 2) = 26271
    MediumCategoryAr(m, 3) = MediumCategoryEN(UBound(MediumCategoryAr))
    MediumCategoryAr(m, 4) = Med_EN_RowNumber(UBound(MediumCategoryAr))
    MediumCategoryAr(m, 5) = 26271
For m = LBound(MediumCategoryAr) To UBound(MediumCategoryAr)
    For n = LBound(Major_CategoryAr) To UBound(Major_CategoryAr)
        If CLng(MediumCategoryAr(m, 1)) > CLng(Major_CategoryAr(n, 11)) And _
           CLng(MediumCategoryAr(m, 1)) < CLng(Major_CategoryAr(n, 12)) And _
           CLng(Major_CategoryAr(n, 12)) < CLng(MediumCategoryAr(m, 2)) Then
            MediumCategoryAr(m, 2) = Major_CategoryAr(n, 12)
        End If
        If CLng(MediumCategoryAr(m, 4)) > CLng(Major_CategoryAr(n, 11)) And _
           CLng(MediumCategoryAr(m, 4)) < CLng(Major_CategoryAr(n, 12)) And _
           CLng(Major_CategoryAr(n, 12)) < CLng(MediumCategoryAr(m, 5)) Then
            MediumCategoryAr(m, 5) = Major_CategoryAr(n, 12)
            Exit For
        End If
    Next n
Next m
p = 0
For i = LBound(tmpAr) To UBound(tmpAr) - 1
    strMinor_CategoryJP = ""
    strMinor_CategoryEN = ""
    For n = LBound(Major_CategoryAr) To UBound(Major_CategoryAr)
        If RegExp_JapaneseOnly.Test(tmpAr(i, 1)) And _
       Not RegExp_5_Number.Test(tmpAr(i, 1)) And _
       Not RegExp_Round_Bracket.Test(tmpAr(i, 1)) And _
       Not RegExp_SquareBracket.Test(tmpAr(i, 1)) And _
       Not RegExp_Angle_Bracket.Test(tmpAr(i, 1)) And _
           InStr(tmpAr(i, 1), Major_CategoryAr(n, 8)) <> 0 And _
           InStr(tmpAr(i + 1, 1), Major_CategoryAr(n, 9)) <> 0 And _
           i >= Major_CategoryAr(n, 11) And _
           i <= Major_CategoryAr(n, 12) Then
            ReDim Preserve Minor_CategoryJP(p)
            ReDim Preserve Minor_CategoryEN(p)
            ReDim Preserve Min_JP_RowNumber(p)
            ReDim Preserve Min_EN_RowNumber(p)
            For k = 1 To 2
                strMinor_CategoryJP = strMinor_CategoryJP & tmpAr(i, k)
                strMinor_CategoryEN = strMinor_CategoryEN & " " & tmpAr(i + 1, k)
                strMinor_CategoryEN = Trim(strMinor_CategoryEN)
            Next k
            Set myMatches = RegExp_JapaneseOnly.Execute(strMinor_CategoryJP)
            Minor_CategoryJP(p) = strMinor_CategoryJP
            Min_JP_RowNumber(p) = i
            Set myMatches = RegExp_Upper_Lower.Execute(strMinor_CategoryEN)
            Minor_CategoryEN(p) = strMinor_CategoryEN
            Min_EN_RowNumber(p) = i + 1
            p = p + 1
        Else
        End If
    Next n
Next i
j = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    strFoodGroup = ""
    strSubFoodGroup = ""
    strSub_Category = ""
    strMajor_Category = ""
    strMinor_Category = ""
    strDetailCategory = ""
    If RegExp_5_Number.Test(tmpAr(i, 1)) And tmpAr(i, 2) <> "(欠番)" Then
        ReDim Preserve ItemNamAr(j)
        ReDim Preserve ItemNumAr(j)
        ReDim Preserve FoodGrouNum(j)
        ReDim Preserve FoodGroupJP(j)
        ReDim Preserve FoodGroupEN(j)
        ReDim Preserve Sub_FoodGroup_JP(j)
        ReDim Preserve Sub_FoodGroup_EN(j)
        ReDim Preserve Sub_Category_JPN(j)
        ReDim Preserve Sub_Category_ENG(j)
        ReDim Preserve Major_CategoryJP(j)
        ReDim Preserve Major_CategoryEN(j)
        ReDim Preserve Major_CategoryLT(j)
        ReDim Preserve Med_Category_JPN(j)
        ReDim Preserve Med_Category_ENG(j)
        ReDim Preserve Minor_CategoryJP(j)
        ReDim Preserve Minor_CategoryEN(j)
        ReDim Preserve DetailCategoryJP(j)
        ReDim Preserve DetailCategoryEN(j)
        ReDim Preserve JapaneseName(j)
        ReDim Preserve English_Name(j)
        ItemNamAr(j) = tmpAr(i, 1)
        ItemNumAr(j) = i
        Select Case True
            Case Left(tmpAr(i, 1), 2) = "01"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "穀類"
                FoodGroupEN(j) = "CEREALS"
                CEREALS = CEREALS + 1
            Case Left(tmpAr(i, 1), 2) = "02"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "いも及びでん粉類"
                FoodGroupEN(j) = "POTATOES AND STARCHES"
                POTATOES = POTATOES + 1
            Case Left(tmpAr(i, 1), 2) = "03"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "砂糖及び甘味類"
                FoodGroupEN(j) = "SUGARS"
                SUGARS = SUGARS + 1
            Case Left(tmpAr(i, 1), 2) = "04"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "豆類"
                FoodGroupEN(j) = "PULSES"
                PULSES = PULSES + 1
            Case Left(tmpAr(i, 1), 2) = "05"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "種実類"
                FoodGroupEN(j) = "NUTS AND SEEDS"
                NUTS = NUTS + 1
            Case Left(tmpAr(i, 1), 2) = "06"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "野菜類"
                FoodGroupEN(j) = "VEGETABLES"
                VEGETABLES = VEGETABLES + 1
            Case Left(tmpAr(i, 1), 2) = "07"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "果実類"
                FoodGroupEN(j) = "FRUITS"
                FRUITS = FRUITS + 1
            Case Left(tmpAr(i, 1), 2) = "08"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "きのこ類"
                FoodGroupEN(j) = "MUSHROOMS"
                MUSHROOMS = MUSHROOMS + 1
            Case Left(tmpAr(i, 1), 2) = "09"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "藻類"
                FoodGroupEN(j) = "ALGAE"
                ALGAE = ALGAE + 1
            Case Left(tmpAr(i, 1), 2) = "10"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "魚介類"
                FoodGroupEN(j) = "FISHES AND SHELLFISHES"
                FISHES = FISHES + 1
            Case Left(tmpAr(i, 1), 2) = "11"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "肉類"
                FoodGroupEN(j) = "MEATS"
                MEATS = MEATS + 1
            Case Left(tmpAr(i, 1), 2) = "12"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "卵類"
                FoodGroupEN(j) = "EGGS"
                EGGS = EGGS + 1
            Case Left(tmpAr(i, 1), 2) = "13"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "乳類"
                FoodGroupEN(j) = "MILKS"
                MILK = MILK + 1
            Case Left(tmpAr(i, 1), 2) = "14"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "油脂類"
                FoodGroupEN(j) = "FATS AND OILS"
                OIL = OIL + 1
            Case Left(tmpAr(i, 1), 2) = "15"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "菓子類"
                FoodGroupEN(j) = "CONFECTIONERIES"
                CONFECTIONERIES = CONFECTIONERIES + 1
            Case Left(tmpAr(i, 1), 2) = "16"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "し好飲料類"
                FoodGroupEN(j) = "BEVERAGES"
                BEVERAGES = BEVERAGES + 1
            Case Left(tmpAr(i, 1), 2) = "17"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "調味料及び香辛料類"
                FoodGroupEN(j) = "SEASONINGS AND SPICES"
                SEASONINGS = SEASONINGS + 1
            Case Left(tmpAr(i, 1), 2) = "18"
                FoodGrouNum(j) = Left(tmpAr(i, 1), 2)
                FoodGroupJP(j) = "調理加工食品類"
                FoodGroupEN(j) = "PREPARED FOODS"
                PREPARED = PREPARED + 1
            Case Else
        End Select
        If RegExpJapaneseName.Test(tmpAr(i, 2)) Then
            Set myMatches = RegExpJapaneseName.Execute(tmpAr(i, 2))
            JapaneseName(j) = myMatches.Item(0).Value
        End If
        For t = 1 To 6
            If RegExp_EnglishName.Test(tmpAr(i + 1, t)) Then
                English_Name(j) = English_Name(j) & " " & tmpAr(i + 1, t)
                English_Name(j) = Trim(English_Name(j))
            Else
                Exit For
            End If
        Next t
        For k = LBound(Major_CategoryAr) To UBound(Major_CategoryAr)
            If CLng(tmpAr(i, 1)) >= CLng(Major_CategoryAr(k, 0)) _
           And CLng(tmpAr(i, 1)) <= CLng(Major_CategoryAr(k, 1)) Then
                Sub_FoodGroup_JP(j) = Major_CategoryAr(k, 4)
                Sub_FoodGroup_EN(j) = Major_CategoryAr(k, 5)
                Sub_Category_JPN(j) = Major_CategoryAr(k, 6)
                Sub_Category_ENG(j) = Major_CategoryAr(k, 7)
                Major_CategoryJP(j) = Major_CategoryAr(k, 8)
                Major_CategoryEN(j) = Major_CategoryAr(k, 9)
                Major_CategoryLT(j) = Major_CategoryAr(k, 10)
                For m = LBound(MediumCategoryAr) To UBound(MediumCategoryAr)
                    If i >= CLng(MediumCategoryAr(m, 1)) And _
                       i <= CLng(MediumCategoryAr(m, 2)) Then
                        Med_Category_JPN(j) = MediumCategoryAr(m, 0)
                    End If
                    If i >= CLng(MediumCategoryAr(m, 4)) And _
                       i <= CLng(MediumCategoryAr(m, 5)) Then
                        Med_Category_ENG(j) = MediumCategoryAr(m, 3)
                    End If
                Next m
            Else
            End If
        Next k
    Else
        j = j - 1
    End If
    j = j + 1
Next i
ReDim ItemArray(UBound(ItemNamAr), 14)
For i = LBound(ItemArray) To UBound(ItemArray)
    ItemArray(i, 0) = ItemNamAr(i)
    ItemArray(i, 1) = FoodGrouNum(i)
    ItemArray(i, 2) = FoodGroupJP(i)
    ItemArray(i, 3) = FoodGroupEN(i)
    ItemArray(i, 4) = Sub_FoodGroup_JP(i)
    ItemArray(i, 5) = Sub_FoodGroup_EN(i)
    ItemArray(i, 6) = Sub_Category_JPN(i)
    ItemArray(i, 7) = Sub_Category_ENG(i)
    ItemArray(i, 8) = Major_CategoryJP(i)
    ItemArray(i, 9) = Major_CategoryEN(i)
    ItemArray(i, 10) = Major_CategoryLT(i)
    ItemArray(i, 11) = Med_Category_JPN(i)
    ItemArray(i, 12) = Med_Category_ENG(i)
    ItemArray(i, 13) = JapaneseName(i)
    ItemArray(i, 14) = English_Name(i)
Next i
Set mySht3 = Worksheets.Add
With mySht3
    .Name = "Result"
    .Range("A1").Value = "ItemNumber"
    .Range("B1").Value = "食品群番号"
    .Range("C1").Value = "食品群"
    .Range("D1").Value = "FoodGroup"
    .Range("E1").Value = "副分類"
    .Range("F1").Value = "SubFoodGroup"
    .Range("G1").Value = "区分"
    .Range("H1").Value = "SubCategory"
    .Range("I1").Value = "大分類"
    .Range("J1").Value = "MajorCategory"
    .Range("K1").Value = "AcademicName"
    .Range("L1").Value = "中分類"
    .Range("M1").Value = "MediumCategory"
    .Range("N1").Value = "小分類・細分"
    .Range("O1").Value = "MinorCategory_Details"
    .Range("A2:O1879").Value = ItemArray
End With
End Sub

Function NoCancelArray(ByRef Sh As Worksheet) As Variant
Dim mySht           As Worksheet
Dim myRng           As Range
Dim tmpAr           As Variant
Dim i               As Long
Dim j               As Long
Dim RegExpCancel    As Object
Dim RegExp_Exit     As Object
Const StrCancel     As String = "^(1\)|residues)$"
Dim CancelItem()    As String
Dim CancelRow1()    As String
Dim CancelRow2()    As String
Dim myCancelAr()    As String
Dim Cancel_Array()  As String
Set RegExpCancel = CreateObject("VBScript.RegExp")
With RegExpCancel
    .Pattern = StrCancel
    .IgnoreCase = True
    .Global = True
End With
Set mySht = Sh
Set myRng = mySht.UsedRange
tmpAr = myRng
j = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    If RegExpCancel.Test(tmpAr(i, 1)) Then
        ReDim Preserve CancelItem(j)
        ReDim Preserve CancelRow1(i)
        CancelItem(j) = tmpAr(i, 1)
        CancelRow1(j) = i
        j = j + 1
    End If
Next i
ReDim myCancelAr(UBound(CancelItem), 1)
For j = LBound(myCancelAr) To UBound(myCancelAr)
    myCancelAr(j, 0) = CancelItem(j)
    myCancelAr(j, 1) = CancelRow1(j)
Next j
ReDim Preserve myCancelAr(UBound(myCancelAr), 2)
j = 0
For i = LBound(myCancelAr) To UBound(myCancelAr) - 1
    If myCancelAr(i, 0) = "1)" Then
        If myCancelAr(i + 2, 0) = "residues" Then
            myCancelAr(i, 2) = myCancelAr(i + 2, 1)
        Else
            myCancelAr(i, 2) = myCancelAr(i + 1, 1)
        End If
        j = j + 1
    End If
Next i
Erase CancelRow1
j = 0
ReDim CancelRow1(j)
ReDim CancelRow2(j)
CancelRow1(j) = myCancelAr(j, 1)
CancelRow2(j) = myCancelAr(j, 2)
For i = LBound(myCancelAr) + 1 To UBound(myCancelAr)
    If myCancelAr(i, 0) = "1)" And _
       myCancelAr(i - 1, 0) <> "1)" Then
        j = j + 1
        ReDim Preserve CancelRow1(j)
        ReDim Preserve CancelRow2(j)
        CancelRow1(j) = myCancelAr(i, 1)
        CancelRow2(j) = myCancelAr(i, 2)
    End If
Next i
ReDim Cancel_Array(UBound(CancelRow1), 1)
j = 0
For j = LBound(Cancel_Array) To UBound(Cancel_Array)
    Cancel_Array(j, 0) = CancelRow1(j)
    Cancel_Array(j, 1) = CancelRow2(j)
Next j
j = 0
Cancel_Array(j, 0) = 1
Cancel_Array(j, 1) = CancelRow1(j)
For j = LBound(Cancel_Array) + 1 To UBound(Cancel_Array)
    Cancel_Array(j, 0) = CancelRow2(j - 1)
    Cancel_Array(j, 1) = CancelRow1(j)
Next j
NoCancelArray = Cancel_Array
End Function

参照:
日本食品標準成分表2010のcsvファイル
日本食品標準成分表2010の食品番号をカテゴリー分類する その2

日本食品標準成分表2010のテキストデータ

文部科学省科学技術・学術政策局政策課資源室に表題のファイルの公開を申請しておりましたが,1月13日決裁が終了したとの連絡がありましたので,公開いたします.なお,公開したファイルを利用するにあたっては下記の2点についてご注意下さい.

1.「日本食品標準成分表2010」に記載されている,(0),Tr,(Tr),-,について,当データでは「 0 」と表記しています.

2.本表の食品成分値は文部科学省科学技術・学術審議会資源調査分科会報告「日本食品標準成分表2010」によるものです.食品成分値を複製又は転載する場合は事前に文部科学省への許可申請もしくは届け出が必要となる場合があります.

連絡先:文部科学省科学技術・学術政策局政策課資源室 E-mail: kagseis@mext.go.jp

M_FOODS.csv

決裁にあたり,各項目の名称と単位とを付記されたいとの依頼が電話でありました.後日修正してアップロードします.

参照:
日本食品標準成分表2010のcsvファイル

『日本食品標準成分表2010』のPDFを.txtファイルに変換する

今回は文部科学省のサイトにある『日本食品標準成分表2010』のPDFファイルから,約1800種類に及ぶ食品毎の栄養組成を抽出し,txtファイルに変換します.

資源調査分科会報告「日本食品標準成分表2010」について



以下の流れで処理を行います.

  1. PDFファイルのダウンロード
  2. テキストをコピーする
  3. EXCELに貼付ける
  4. VBAによる処理

1. PDFファイルのダウンロード

上記リンク先にあるPDFファイルをダウンロードします.ファイル名は1299012_1.pdfから1299012_18.pdfまでです.

2. テキストをコピーする

ドキュメント内の全テキストを選択するには,表示/ページ表示/単一ページ以外にして下さい.その上で『全てを選択』してコピーします.単一ページ表示ですと,表示しているページだけのテキストが選択されます.
テキストの選択およびコピー

3. EXCELに貼付ける

貼り付けのオプション/テキストファイルウィザードを使用を選択します.テキストファイルウィザード1/3では元のデータ形式で『カンマやタブなどの区切り文字によってフィールドごとに区切られたデータ』を選択します.テキストファイルウィザード2/3では特に変更なく次へ進みます.テキストファイルウィザード3/3では最初のカラムの列のデータ形式のみ『文字列』に変更して完了をクリックします.




4. VBAによる処理

Alt+F11キーを押下してVBEを起動します.挿入メニューから標準モジュールを選択し,下記のコードを貼り付けます.

Option Explicit
Sub Procedure1()
Dim mySht           As Worksheet
Dim myAr1           As Variant
Dim myAr2           As Variant
Dim i               As Long
Dim j               As Long
Dim k               As Long
Dim myAr(1878, 53)  As String
Dim RegEx1          As Object
Dim Match1          As Object
Dim Matches1        As Object
Dim strPtn1         As String
Dim tempStr         As String
Dim RegEx2          As Object
Dim Match2          As Object
Dim Matches2        As Object
Dim strPtn2         As String
Set mySht = ActiveSheet
Set myAr1 = Application.Intersect(mySht.UsedRange, mySht.Range("A:B"))
Set myAr2 = mySht.UsedRange
Set RegEx1 = CreateObject("VBScript.RegExp")
Set RegEx2 = CreateObject("VBScript.RegExp")
strPtn2 = "^[0-9]+(\.[0-9]*)?"
i = 0
For k = 1 To myAr1.Rows.Count
    If Len(myAr1.Cells(k, 1).Value) = 5 And _
        myAr1.Cells(k, 1).Value >= "01001" And _
        myAr1.Cells(k, 1).Value <= "18016" And _
        myAr1.Cells(k, 2) <> "(欠番)" Then
        myAr(i, 0) = myAr2.Cells(k, 1)
        strPtn1 = "^[0-9]+$"
        RegEx1.Pattern = strPtn1
        RegEx1.IgnoreCase = True
        RegEx1.Global = True
        Set Matches1 = RegEx1.Execute(myAr2.Cells(k, 2))
        If Matches1.Count > 0 Then
            tempStr = myAr2.Cells(k, 2) & myAr2.Cells(k, 3)
            strPtn1 = "[^0-9][0-9]+$"
            RegEx1.Pattern = strPtn1
            RegEx1.IgnoreCase = True
            RegEx1.Global = True
            Set Matches1 = RegEx1.Execute(tempStr)
            If Matches1.Count > 0 Then
                Set Match1 = Matches1.Item(Matches1.Count - 1)
                myAr(i, 1) = Left(tempStr, Match1.firstindex + 1)
                myAr(i, 2) = Mid(tempStr, Match1.firstindex + 2)
                For j = 3 To 52
                    Select Case True
                    Case myAr2.Cells(k, j + 1).Value = "-"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "Tr"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "(Tr)"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "(0)"
                        myAr(i, j) = 0
                    Case Else
                        myAr(i, j) = myAr2.Cells(k, j + 1).Value
                    End Select
                Next j
                RegEx2.Pattern = strPtn2
                RegEx2.IgnoreCase = True
                RegEx2.Global = True
                Set Matches2 = RegEx2.Execute(myAr2.Cells(k, 54))
                On Error Resume Next
                Set Match2 = Matches2.Item(0)
                On Error GoTo 0
                myAr(i, 53) = Match2.Value
            Else
                For j = 1 To 52
                    Select Case True
                    Case myAr2.Cells(k, j + 1).Value = "-"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "Tr"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "(Tr)"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "(0)"
                        myAr(i, j) = 0
                    Case Else
                        myAr(i, j) = myAr2.Cells(k, j + 1).Value
                    End Select
                Next j
                    RegEx2.Pattern = strPtn2
                    RegEx2.IgnoreCase = True
                    RegEx2.Global = True
                    Set Matches2 = RegEx2.Execute(myAr2.Cells(k, 54))
                    On Error Resume Next
                    Set Match2 = Matches2.Item(0)
                    On Error GoTo 0
                    myAr(i, 53) = Match2.Value
            End If
        Else
            strPtn1 = "[^0-9][0-9]+$"
            RegEx1.Pattern = strPtn1
            RegEx1.IgnoreCase = True
            RegEx1.Global = True
            Set Matches1 = RegEx1.Execute(myAr2.Cells(k, 2))
            If Matches1.Count > 0 Then
                Set Match1 = Matches1.Item(Matches1.Count - 1)
                myAr(i, 1) = Left(myAr2.Cells(k, 2), Match1.firstindex + 1)
                myAr(i, 2) = Mid(myAr2.Cells(k, 2), Match1.firstindex + 2)
                For j = 3 To 52
                    Select Case True
                    Case myAr2.Cells(k, j).Value = "-"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j).Value = "Tr"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j).Value = "(Tr)"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j).Value = "(0)"
                        myAr(i, j) = 0
                    Case Else
                        myAr(i, j) = myAr2.Cells(k, j).Value
                    End Select
                Next j
                RegEx2.Pattern = strPtn2
                RegEx2.IgnoreCase = True
                RegEx2.Global = True
                Set Matches2 = RegEx2.Execute(myAr2.Cells(k, 53))
                On Error Resume Next
                Set Match2 = Matches2.Item(0)
                On Error GoTo 0
                myAr(i, 53) = Match2.Value
            Else
                For j = 1 To 52
                    Select Case True
                    Case myAr2.Cells(k, j + 1).Value = "-"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "Tr"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "(Tr)"
                        myAr(i, j) = 0
                    Case myAr2.Cells(k, j + 1).Value = "(0)"
                        myAr(i, j) = 0
                    Case Else
                        myAr(i, j) = myAr2.Cells(k, j + 1).Value
                    End Select
                Next j
                    RegEx2.Pattern = strPtn2
                    RegEx2.IgnoreCase = True
                    RegEx2.Global = True
                    Set Matches2 = RegEx2.Execute(myAr2.Cells(k, 54))
                    On Error Resume Next
                    Set Match2 = Matches2.Item(0)
                    On Error GoTo 0
                    myAr(i, 53) = Match2.Value
            End If
        End If
    Else
        i = i - 1
    End If
    i = i + 1
Next k
Set mySht = Worksheets.Add
With mySht
    .Name = myAr2.Cells(1, 1).Value & myAr2.Cells(1, 2).Value
    .Range("A1:BB1878").Value = myAr
End With
Set mySht = Nothing
Set myAr1 = Nothing
Set myAr2 = Nothing
Set RegEx1 = Nothing
Set Match1 = Nothing
Set Matches1 = Nothing
Set RegEx2 = Nothing
Set Match2 = Nothing
Set Matches2 = Nothing
End Sub

以上の処理をPDFファイルの数だけ繰り返します.更に追加されたワークシート以外のワークシートを削除し,以下のコードを実行して.txtファイルにすべてのレコードを出力します.

Sub AllSheets_to_TextFile()
Dim myBook          As Workbook
Dim mySht           As Worksheet
Dim tmpSht          As Worksheet
Dim myRng           As Range
Dim myAr(1877, 53)  As String
Dim tempAr          As Variant
Dim i               As Long
Dim j               As Long
Dim k               As Long
Dim GOF             As Variant
Dim RegExp          As Object
Dim Matches         As Object
Dim Match           As Object
Dim strPtn          As String
Set RegExp = CreateObject("VBScript.RegExp")
strPtn = "\\"
k = 0
For Each tmpSht In Worksheets
    Set myRng = tmpSht.Range("A1").CurrentRegion
    tempAr = myRng
    For i = LBound(tempAr) To UBound(tempAr)
        For j = LBound(tempAr, 2) To UBound(tempAr, 2)
            myAr(k, j - 1) = tempAr(i, j)
        Next j
        k = k + 1
    Next i
Next tmpSht
GOF = Application.GetOpenFilename(FileFilter:="PDF file,*.pdf", _
                                  Title:="Select PDF file", _
                                  MultiSelect:=False)
If TypeName(GOF) = "Boolean" Then Exit Sub
GOF = Left(GOF, Len(GOF) - 4) & ".txt"
With RegExp
    .Pattern = strPtn
    .IgnoreCase = True
    .Global = True
End With
Set Matches = RegExp.Execute(GOF)
GOF = Left(GOF, Matches.Item(Matches.Count - 1).firstindex) & "\M_FOODS.txt"
Set mySht = Worksheets.Add
With mySht
    .Name = "M_FOODS"
    .Range("A1:BB1878") = myAr
    .Move
End With
ActiveWorkbook.SaveAs Filename:=GOF, _
                      FileFormat:=xlText, _
                      CreateBackup:=False
Set myBook = ActiveWorkbook
Application.DisplayAlerts = False
myBook.Close
Application.DisplayAlerts = True
Set Match = Nothing
Set Matches = Nothing
Set RegExp = Nothing
Set myRng = Nothing
Set mySht = Nothing
Set tmpSht = Nothing
Set myBook = Nothing
End Sub

なお,文部科学省は著作権を理由にデータの複製を行う際には連絡するよう連絡先を示しています.この記事を書いた2011年11月26日時点ではまだ連絡しておりませんので,やり方だけ公開します.