I have released ‘Standard Tables of Food Composition in Japan 2010′ on Jan. 18, 2012. However, I did not classify which Item_Number is categorized into food groups or derived from any organism.
In this contents, I have described incomplete way how to classify them.
Example 1 shows that the code exports Item_Number, major category, medium category, minor category and details. Please note that the tree structures is not complete.
Example 2 shows complete tree structures. However, I could not write the complete code with recursion.
Example 1. Incomplete data
| Item Number | Major Category | Medium Category | Minor Category | Major Category | Medium Category | Minor Category | 
| 01012 | こむぎ | [玄穀] | 国産 | Wheat | [Whole grain] | Domestic | 
| 01013 | 輸入 | Imported | ||||
| 01014 | 輸入 | Imported | ||||
| 01015 | [小麦粉] | 薄力粉 | [Wheat flour] | Soft flour | ||
| 01016 | [小麦粉] | 薄力粉 | [Wheat flour] | Soft flour | ||
| 01018 | 中力粉 | Medium flour | ||||
| 01019 | 中力粉 | Medium flour | ||||
| 01020 | 強力粉 | Hard flour | ||||
| 01021 | 強力粉 | Hard flour | ||||
| 01023 | 強力粉 | Hard flour | ||||
| 01024 | プレミックス粉 | Premixed flour | ||||
| 01025 | プレミックス粉 | Premixed flour | 
Example 2. Complete data
| Item Number | Major Category | Medium Category | Minor Category | Major Category | MediumCategory | MinorCategory | 
| 01012 | こむぎ | [玄穀] | 国産 | Wheat | [Whole grain] | Domestic | 
| 01013 | こむぎ | [玄穀] | 輸入 | Wheat | [Whole grain] | Imported | 
| 01014 | こむぎ | [玄穀] | 輸入 | Wheat | [Whole grain] | Imported | 
| 01015 | こむぎ | [小麦粉] | 薄力粉 | Wheat | [Wheat flour] | Soft flour | 
| 01016 | こむぎ | [小麦粉] | 薄力粉 | Wheat | [Wheat flour] | Soft flour | 
| 01018 | こむぎ | [小麦粉] | 中力粉 | Wheat | [Wheat flour] | Medium flour | 
| 01019 | こむぎ | [小麦粉] | 中力粉 | Wheat | [Wheat flour] | Medium flour | 
| 01020 | こむぎ | [小麦粉] | 強力粉 | Wheat | [Whole flour] | Hard flour | 
| 01021 | こむぎ | [小麦粉] | 強力粉 | Wheat | [Wheat flour] | Hard flour | 
| 01023 | こむぎ | [小麦粉] | 強力粉 | Wheat | [Wheat flour] | Hard flour | 
| 01024 | こむぎ | [小麦粉] | プレミックス粉 | Wheat | [Wheat flour] | Premixed flour | 
| 01025 | こむぎ | [小麦粉] | プレミックス粉 | Wheat | [Wheat flour] | Premixed flour | 
Please copy text from the PDF files and paste to EXCEL worksheet by such procedure as described in this content. Press ‘Alt’ key and ‘F11’ key to load VBE. Run the following code:
Option Explicit
Sub ItemNum()
Dim mySht           As Worksheet
Dim myRng           As Range
Dim i               As Long
Dim j               As Long
Dim k               As Long
Dim tmpAr           As Variant
Dim myItem()        As String
Dim myNum1()        As String
Dim myNum2()        As String
Dim ItemNumAr()     As String
Dim myCancel()      As String
Dim Cancel_Ar()     As String
Dim myAr()          As String
Dim myAr2()         As String
Dim myGroupNamJP()  As String
Dim myGroupNumJP()  As String
Dim myGroupNamEN()  As String
Dim myGroupNumEN()  As String
Dim GroupAr()       As String
Dim myRegExp1       As Object
Dim myRegExp2       As Object
Dim myStrPtn        As String
Dim myStrPtn2       As String
Const startStrPtn   As String = "^(1\\)|residues)$"
Dim tmpStrJ         As String
Dim tmpStrE         As String
Const endStrPtn     As String = "[0-9]\\)$"
Const JapStrPtn     As String = "([ぁ-ヶ]|[亜-黑])+$"
Dim myStr           As String
Set mySht = ActiveSheet
Set myRng = Application.Intersect(mySht.Range("A:F"), _
                                    mySht.UsedRange)
tmpAr = myRng
Set myRegExp1 = CreateObject("VBScript.RegExp")
myStrPtn = "^[0-9]{5}$"
With myRegExp1
    .Pattern = myStrPtn
    .IgnoreCase = True
    .Global = True
End With
Set myRegExp2 = CreateObject("VBScript.RegExp")
With myRegExp2
    .Pattern = startStrPtn
    .IgnoreCase = True
    .Global = True
End With
j = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    If myRegExp1.Test(tmpAr(i, 1)) And _
       tmpAr(i, 2) <> "(欠番)" Then
        ReDim Preserve myItem(j)
        ReDim Preserve myNum1(j)
        myItem(j) = tmpAr(i, 1)
        myNum1(j) = i
    Else
        j = j - 1
    End If
    j = j + 1
Next i
ReDim ItemNumAr(j - 1, 2)
    ItemNumAr(LBound(ItemNumAr), 0) = myItem(LBound(ItemNumAr))
    ItemNumAr(LBound(ItemNumAr), 1) = 7
    ItemNumAr(LBound(ItemNumAr), 2) = myNum1(LBound(ItemNumAr))
For k = LBound(ItemNumAr) + 1 To UBound(ItemNumAr)
    ItemNumAr(k, 0) = myItem(k)
    ItemNumAr(k, 1) = myNum1(k - 1) + 1
    ItemNumAr(k, 2) = myNum1(k)
Next k
Erase myItem
Erase myNum1
j = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    If myRegExp2.Test(tmpAr(i, 1)) _
    Then
        ReDim Preserve myItem(j)
        ReDim Preserve myNum1(i)
        myItem(j) = tmpAr(i, 1)
        myNum1(j) = i
    Else
        j = j - 1
    End If
    j = j + 1
Next i
ReDim myCancel(UBound(myItem), 1)
For k = LBound(myCancel) To UBound(myCancel)
    myCancel(k, 0) = myItem(k)
    myCancel(k, 1) = myNum1(k)
Next k
Erase myItem
Erase myNum1
ReDim Preserve myCancel(UBound(myCancel), 2)
j = 0
For i = LBound(myCancel) To UBound(myCancel) - 1
    If myCancel(i, 0) = "1)" Then
        If myCancel(i + 2, 0) = "residues" Then
            myCancel(i, 2) = myCancel(i + 2, 1)
        Else
            myCancel(i, 2) = myCancel(i + 1, 1)
        End If
    Else
        j = j - 1
    End If
    j = j + 1
Next i
ReDim Cancel_Ar(j - 1, 2)
j = 0
For i = LBound(myCancel) To UBound(myCancel) - 1
    If myCancel(i, 0) = "1)" Then
        Cancel_Ar(j, 0) = myCancel(i, 0)
        Cancel_Ar(j, 1) = myCancel(i, 1)
        Cancel_Ar(j, 2) = myCancel(i, 2)
    Else
        j = j - 1
    End If
    j = j + 1
Next i
k = 0
ReDim myItem(k)
ReDim myNum1(k)
ReDim myNum2(k)
For i = LBound(ItemNumAr) To UBound(ItemNumAr)
    ReDim Preserve myItem(k)
    ReDim Preserve myNum1(k)
    ReDim Preserve myNum2(k)
    For j = LBound(Cancel_Ar) To UBound(Cancel_Ar)
        If CLng(ItemNumAr(i, 1)) < CLng(Cancel_Ar(j, 1)) And _
                                   CLng(Cancel_Ar(j, 1)) < CLng(ItemNumAr(i, 2)) And _
           CLng(ItemNumAr(i, 1)) < CLng(Cancel_Ar(j, 2)) And _
                                   CLng(Cancel_Ar(j, 2)) < CLng(ItemNumAr(i, 2)) _
        Then
            If Cancel_Ar(j, 1) - ItemNumAr(i, 1) < 3 Then
                myItem(k) = ItemNumAr(i, 0)
                myNum1(k) = Cancel_Ar(j, 2)
                myNum2(k) = ItemNumAr(i, 2)
            Else
                myNum2(k) = Cancel_Ar(j, 1)
                k = k + 1
                ReDim Preserve myItem(k)
                ReDim Preserve myNum1(k)
                ReDim Preserve myNum2(k)
                myItem(k) = ItemNumAr(i, 0)
                myNum1(k) = Cancel_Ar(j, 2)
                myNum2(k) = ItemNumAr(i, 2)
            End If
        Else
            myItem(k) = ItemNumAr(i, 0)
            myNum1(k) = ItemNumAr(i, 1)
            myNum2(k) = ItemNumAr(i, 2)
        End If
    Next j
    k = k + 1
Next i
ReDim myAr(UBound(myItem), 2)
For i = LBound(myAr) To UBound(myAr)
    myAr(i, 0) = myItem(i)
    myAr(i, 1) = myNum1(i)
    myAr(i, 2) = myNum2(i)
Next i
Erase myItem
Erase myNum1
Erase myNum2
myStrPtn2 = "^(\\[|\\()?[a-zA-Z]+"
With myRegExp1
    .Pattern = myStrPtn2
    .IgnoreCase = True
    .Global = True
End With
k = 0
For i = LBound(tmpAr) To UBound(tmpAr)
    For j = LBound(myAr) To UBound(myAr)
        If CLng(myAr(j, 1)) < i And _
           CLng(myAr(j, 2)) > i And _
           myRegExp1.Test(tmpAr(i, 1)) _
        Then
            ReDim Preserve myGroupNamJP(k)
            ReDim Preserve myGroupNumJP(k)
            ReDim Preserve myGroupNamEN(k)
            ReDim Preserve myGroupNumEN(k)
            myGroupNamJP(k) = tmpAr(i - 1, 1) & _
                           tmpAr(i - 1, 2) & _
                           tmpAr(i - 1, 3) & _
                           tmpAr(i - 1, 4) & _
                           tmpAr(i - 1, 5) & _
                           tmpAr(i - 1, 6)
            myGroupNumJP(k) = i - 1
            myGroupNamEN(k) = RTrim(tmpAr(i, 1) & " " & _
                         Replace(tmpAr(i, 2), "*", "") & " " & _
                         Replace(tmpAr(i, 3), "*", "") & " " & _
                         Replace(tmpAr(i, 4), "*", "") & " " & _
                         Replace(tmpAr(i, 5), "*", "") & " " & _
                         Replace(tmpAr(i, 6), "*", ""))
            myGroupNumEN(k) = i
        Else
            k = k - 1
        End If
        k = k + 1
    Next j
Next i
ReDim GroupAr(UBound(myGroupNamJP), 3)
For i = LBound(GroupAr) To UBound(GroupAr)
    GroupAr(i, 0) = myGroupNamJP(i)
    GroupAr(i, 1) = myGroupNumJP(i)
    GroupAr(i, 2) = myGroupNamEN(i)
    GroupAr(i, 3) = myGroupNumEN(i)
Next i
Erase myGroupNamJP
Erase myGroupNumJP
Erase myGroupNamEN
Erase myGroupNumEN
k = 0
For i = LBound(GroupAr) To UBound(GroupAr)
    ReDim Preserve myGroupNamJP(k)
    ReDim Preserve myGroupNumJP(k)
    ReDim Preserve myGroupNamEN(k)
    ReDim Preserve myGroupNumEN(k)
    myGroupNamJP(k) = GroupAr(i, 0)
    myGroupNumJP(k) = GroupAr(i, 1)
    myGroupNamEN(k) = GroupAr(i, 2)
    myGroupNumEN(k) = GroupAr(i, 3)
    k = k + 1
    For j = LBound(Cancel_Ar) To UBound(Cancel_Ar)
        If CLng(Cancel_Ar(j, 1)) < CLng(GroupAr(i, 1)) And _
           CLng(GroupAr(i, 1)) < CLng(Cancel_Ar(j, 2)) _
        Then
            k = k - 1
        End If
    Next j
Next i
ReDim GroupAr(UBound(myGroupNamJP), 3)
With myRegExp1
    .Pattern = endStrPtn
    .IgnoreCase = True
    .Global = True
End With
With myRegExp2
    .Pattern = JapStrPtn
    .IgnoreCase = True
    .Global = True
End With
For i = LBound(GroupAr) To UBound(GroupAr)
    myGroupNamJP(i) = myRegExp1.Replace(myGroupNamJP(i), "")
    myGroupNamEN(i) = myRegExp1.Replace(myGroupNamEN(i), "")
    myGroupNamEN(i) = RTrim(myRegExp2.Replace(myGroupNamEN(i), ""))
    GroupAr(i, 0) = myGroupNamJP(i)
    GroupAr(i, 1) = myGroupNumJP(i)
    GroupAr(i, 2) = myGroupNamEN(i)
    GroupAr(i, 3) = myGroupNumEN(i)
Next i
ReDim Preserve myAr(UBound(myAr), 5)
ReDim myAr2(UBound(myAr), 3)
For i = LBound(myAr) To UBound(myAr)
    tmpStrJ = ""
    tmpStrE = ""
    myAr2(i, 0) = myAr(i, 0)
    For j = LBound(GroupAr) To UBound(GroupAr)
        If CLng(myAr(i, 1)) < CLng(GroupAr(j, 1)) And _
                              CLng(GroupAr(j, 3)) < CLng(myAr(i, 2)) Then
            tmpStrJ = tmpStrJ & GroupAr(j, 0)
            tmpStrE = RTrim(tmpStrE & " " & GroupAr(j, 2))
            myAr(i, 4) = GroupAr(j, 1)
        End If
    Next j
    If tmpStrJ = "" Then
        myAr(i, 3) = myAr(i - 1, 3)
        myAr(i, 4) = myAr(i - 1, 4)
        myAr(i, 5) = myAr(i - 1, 5)
        myAr2(i, 1) = myAr(i - 1, 3)
        myAr2(i, 2) = myAr(i - 1, 5)
    Else
        myAr(i, 3) = tmpStrJ
        myAr(i, 5) = tmpStrE
        myAr2(i, 1) = tmpStrJ
        myAr2(i, 2) = tmpStrE
    End If
Next i
Set mySht = Worksheets.Add
With mySht
    .Range("A1").Value = "Item_Number"
    .Range("B1").Value = "上位食品名(日)"
    .Range("C1").Value = "上位食品名(英)"
    .Range("A2:C450") = myAr2
End With
Erase ItemNumAr
Erase Cancel_Ar
Erase GroupAr
Erase myAr
Erase myAr2
Set mySht = Nothing
Set myRng = Nothing
Set myRegExp1 = Nothing
Set myRegExp2 = Nothing
End Sub