I have released ‘Standard Tables of Food Composition in Japan 2010′ on Jan. 18, 2012. However, I did not classify which Item_Number is categorized into food groups or derived from any organism.
In this contents, I have described incomplete way how to classify them.
Example 1 shows that the code exports Item_Number, major category, medium category, minor category and details. Please note that the tree structures is not complete.
Example 2 shows complete tree structures. However, I could not write the complete code with recursion.
Example 1. Incomplete data
Item Number | Major Category | Medium Category | Minor Category | Major Category | Medium Category | Minor Category |
01012 | こむぎ | [玄穀] | 国産 | Wheat | [Whole grain] | Domestic |
01013 | 輸入 | Imported | ||||
01014 | 輸入 | Imported | ||||
01015 | [小麦粉] | 薄力粉 | [Wheat flour] | Soft flour | ||
01016 | [小麦粉] | 薄力粉 | [Wheat flour] | Soft flour | ||
01018 | 中力粉 | Medium flour | ||||
01019 | 中力粉 | Medium flour | ||||
01020 | 強力粉 | Hard flour | ||||
01021 | 強力粉 | Hard flour | ||||
01023 | 強力粉 | Hard flour | ||||
01024 | プレミックス粉 | Premixed flour | ||||
01025 | プレミックス粉 | Premixed flour |
Example 2. Complete data
Item Number | Major Category | Medium Category | Minor Category | Major Category | MediumCategory | MinorCategory |
01012 | こむぎ | [玄穀] | 国産 | Wheat | [Whole grain] | Domestic |
01013 | こむぎ | [玄穀] | 輸入 | Wheat | [Whole grain] | Imported |
01014 | こむぎ | [玄穀] | 輸入 | Wheat | [Whole grain] | Imported |
01015 | こむぎ | [小麦粉] | 薄力粉 | Wheat | [Wheat flour] | Soft flour |
01016 | こむぎ | [小麦粉] | 薄力粉 | Wheat | [Wheat flour] | Soft flour |
01018 | こむぎ | [小麦粉] | 中力粉 | Wheat | [Wheat flour] | Medium flour |
01019 | こむぎ | [小麦粉] | 中力粉 | Wheat | [Wheat flour] | Medium flour |
01020 | こむぎ | [小麦粉] | 強力粉 | Wheat | [Whole flour] | Hard flour |
01021 | こむぎ | [小麦粉] | 強力粉 | Wheat | [Wheat flour] | Hard flour |
01023 | こむぎ | [小麦粉] | 強力粉 | Wheat | [Wheat flour] | Hard flour |
01024 | こむぎ | [小麦粉] | プレミックス粉 | Wheat | [Wheat flour] | Premixed flour |
01025 | こむぎ | [小麦粉] | プレミックス粉 | Wheat | [Wheat flour] | Premixed flour |
Please copy text from the PDF files and paste to EXCEL worksheet by such procedure as described in this content. Press ‘Alt’ key and ‘F11’ key to load VBE. Run the following code:
Option Explicit Sub ItemNum() Dim mySht As Worksheet Dim myRng As Range Dim i As Long Dim j As Long Dim k As Long Dim tmpAr As Variant Dim myItem() As String Dim myNum1() As String Dim myNum2() As String Dim ItemNumAr() As String Dim myCancel() As String Dim Cancel_Ar() As String Dim myAr() As String Dim myAr2() As String Dim myGroupNamJP() As String Dim myGroupNumJP() As String Dim myGroupNamEN() As String Dim myGroupNumEN() As String Dim GroupAr() As String Dim myRegExp1 As Object Dim myRegExp2 As Object Dim myStrPtn As String Dim myStrPtn2 As String Const startStrPtn As String = "^(1\\)|residues)$" Dim tmpStrJ As String Dim tmpStrE As String Const endStrPtn As String = "[0-9]\\)$" Const JapStrPtn As String = "([ぁ-ヶ]|[亜-黑])+$" Dim myStr As String Set mySht = ActiveSheet Set myRng = Application.Intersect(mySht.Range("A:F"), _ mySht.UsedRange) tmpAr = myRng Set myRegExp1 = CreateObject("VBScript.RegExp") myStrPtn = "^[0-9]{5}$" With myRegExp1 .Pattern = myStrPtn .IgnoreCase = True .Global = True End With Set myRegExp2 = CreateObject("VBScript.RegExp") With myRegExp2 .Pattern = startStrPtn .IgnoreCase = True .Global = True End With j = 0 For i = LBound(tmpAr) To UBound(tmpAr) If myRegExp1.Test(tmpAr(i, 1)) And _ tmpAr(i, 2) <> "(欠番)" Then ReDim Preserve myItem(j) ReDim Preserve myNum1(j) myItem(j) = tmpAr(i, 1) myNum1(j) = i Else j = j - 1 End If j = j + 1 Next i ReDim ItemNumAr(j - 1, 2) ItemNumAr(LBound(ItemNumAr), 0) = myItem(LBound(ItemNumAr)) ItemNumAr(LBound(ItemNumAr), 1) = 7 ItemNumAr(LBound(ItemNumAr), 2) = myNum1(LBound(ItemNumAr)) For k = LBound(ItemNumAr) + 1 To UBound(ItemNumAr) ItemNumAr(k, 0) = myItem(k) ItemNumAr(k, 1) = myNum1(k - 1) + 1 ItemNumAr(k, 2) = myNum1(k) Next k Erase myItem Erase myNum1 j = 0 For i = LBound(tmpAr) To UBound(tmpAr) If myRegExp2.Test(tmpAr(i, 1)) _ Then ReDim Preserve myItem(j) ReDim Preserve myNum1(i) myItem(j) = tmpAr(i, 1) myNum1(j) = i Else j = j - 1 End If j = j + 1 Next i ReDim myCancel(UBound(myItem), 1) For k = LBound(myCancel) To UBound(myCancel) myCancel(k, 0) = myItem(k) myCancel(k, 1) = myNum1(k) Next k Erase myItem Erase myNum1 ReDim Preserve myCancel(UBound(myCancel), 2) j = 0 For i = LBound(myCancel) To UBound(myCancel) - 1 If myCancel(i, 0) = "1)" Then If myCancel(i + 2, 0) = "residues" Then myCancel(i, 2) = myCancel(i + 2, 1) Else myCancel(i, 2) = myCancel(i + 1, 1) End If Else j = j - 1 End If j = j + 1 Next i ReDim Cancel_Ar(j - 1, 2) j = 0 For i = LBound(myCancel) To UBound(myCancel) - 1 If myCancel(i, 0) = "1)" Then Cancel_Ar(j, 0) = myCancel(i, 0) Cancel_Ar(j, 1) = myCancel(i, 1) Cancel_Ar(j, 2) = myCancel(i, 2) Else j = j - 1 End If j = j + 1 Next i k = 0 ReDim myItem(k) ReDim myNum1(k) ReDim myNum2(k) For i = LBound(ItemNumAr) To UBound(ItemNumAr) ReDim Preserve myItem(k) ReDim Preserve myNum1(k) ReDim Preserve myNum2(k) For j = LBound(Cancel_Ar) To UBound(Cancel_Ar) If CLng(ItemNumAr(i, 1)) < CLng(Cancel_Ar(j, 1)) And _ CLng(Cancel_Ar(j, 1)) < CLng(ItemNumAr(i, 2)) And _ CLng(ItemNumAr(i, 1)) < CLng(Cancel_Ar(j, 2)) And _ CLng(Cancel_Ar(j, 2)) < CLng(ItemNumAr(i, 2)) _ Then If Cancel_Ar(j, 1) - ItemNumAr(i, 1) < 3 Then myItem(k) = ItemNumAr(i, 0) myNum1(k) = Cancel_Ar(j, 2) myNum2(k) = ItemNumAr(i, 2) Else myNum2(k) = Cancel_Ar(j, 1) k = k + 1 ReDim Preserve myItem(k) ReDim Preserve myNum1(k) ReDim Preserve myNum2(k) myItem(k) = ItemNumAr(i, 0) myNum1(k) = Cancel_Ar(j, 2) myNum2(k) = ItemNumAr(i, 2) End If Else myItem(k) = ItemNumAr(i, 0) myNum1(k) = ItemNumAr(i, 1) myNum2(k) = ItemNumAr(i, 2) End If Next j k = k + 1 Next i ReDim myAr(UBound(myItem), 2) For i = LBound(myAr) To UBound(myAr) myAr(i, 0) = myItem(i) myAr(i, 1) = myNum1(i) myAr(i, 2) = myNum2(i) Next i Erase myItem Erase myNum1 Erase myNum2 myStrPtn2 = "^(\\[|\\()?[a-zA-Z]+" With myRegExp1 .Pattern = myStrPtn2 .IgnoreCase = True .Global = True End With k = 0 For i = LBound(tmpAr) To UBound(tmpAr) For j = LBound(myAr) To UBound(myAr) If CLng(myAr(j, 1)) < i And _ CLng(myAr(j, 2)) > i And _ myRegExp1.Test(tmpAr(i, 1)) _ Then ReDim Preserve myGroupNamJP(k) ReDim Preserve myGroupNumJP(k) ReDim Preserve myGroupNamEN(k) ReDim Preserve myGroupNumEN(k) myGroupNamJP(k) = tmpAr(i - 1, 1) & _ tmpAr(i - 1, 2) & _ tmpAr(i - 1, 3) & _ tmpAr(i - 1, 4) & _ tmpAr(i - 1, 5) & _ tmpAr(i - 1, 6) myGroupNumJP(k) = i - 1 myGroupNamEN(k) = RTrim(tmpAr(i, 1) & " " & _ Replace(tmpAr(i, 2), "*", "") & " " & _ Replace(tmpAr(i, 3), "*", "") & " " & _ Replace(tmpAr(i, 4), "*", "") & " " & _ Replace(tmpAr(i, 5), "*", "") & " " & _ Replace(tmpAr(i, 6), "*", "")) myGroupNumEN(k) = i Else k = k - 1 End If k = k + 1 Next j Next i ReDim GroupAr(UBound(myGroupNamJP), 3) For i = LBound(GroupAr) To UBound(GroupAr) GroupAr(i, 0) = myGroupNamJP(i) GroupAr(i, 1) = myGroupNumJP(i) GroupAr(i, 2) = myGroupNamEN(i) GroupAr(i, 3) = myGroupNumEN(i) Next i Erase myGroupNamJP Erase myGroupNumJP Erase myGroupNamEN Erase myGroupNumEN k = 0 For i = LBound(GroupAr) To UBound(GroupAr) ReDim Preserve myGroupNamJP(k) ReDim Preserve myGroupNumJP(k) ReDim Preserve myGroupNamEN(k) ReDim Preserve myGroupNumEN(k) myGroupNamJP(k) = GroupAr(i, 0) myGroupNumJP(k) = GroupAr(i, 1) myGroupNamEN(k) = GroupAr(i, 2) myGroupNumEN(k) = GroupAr(i, 3) k = k + 1 For j = LBound(Cancel_Ar) To UBound(Cancel_Ar) If CLng(Cancel_Ar(j, 1)) < CLng(GroupAr(i, 1)) And _ CLng(GroupAr(i, 1)) < CLng(Cancel_Ar(j, 2)) _ Then k = k - 1 End If Next j Next i ReDim GroupAr(UBound(myGroupNamJP), 3) With myRegExp1 .Pattern = endStrPtn .IgnoreCase = True .Global = True End With With myRegExp2 .Pattern = JapStrPtn .IgnoreCase = True .Global = True End With For i = LBound(GroupAr) To UBound(GroupAr) myGroupNamJP(i) = myRegExp1.Replace(myGroupNamJP(i), "") myGroupNamEN(i) = myRegExp1.Replace(myGroupNamEN(i), "") myGroupNamEN(i) = RTrim(myRegExp2.Replace(myGroupNamEN(i), "")) GroupAr(i, 0) = myGroupNamJP(i) GroupAr(i, 1) = myGroupNumJP(i) GroupAr(i, 2) = myGroupNamEN(i) GroupAr(i, 3) = myGroupNumEN(i) Next i ReDim Preserve myAr(UBound(myAr), 5) ReDim myAr2(UBound(myAr), 3) For i = LBound(myAr) To UBound(myAr) tmpStrJ = "" tmpStrE = "" myAr2(i, 0) = myAr(i, 0) For j = LBound(GroupAr) To UBound(GroupAr) If CLng(myAr(i, 1)) < CLng(GroupAr(j, 1)) And _ CLng(GroupAr(j, 3)) < CLng(myAr(i, 2)) Then tmpStrJ = tmpStrJ & GroupAr(j, 0) tmpStrE = RTrim(tmpStrE & " " & GroupAr(j, 2)) myAr(i, 4) = GroupAr(j, 1) End If Next j If tmpStrJ = "" Then myAr(i, 3) = myAr(i - 1, 3) myAr(i, 4) = myAr(i - 1, 4) myAr(i, 5) = myAr(i - 1, 5) myAr2(i, 1) = myAr(i - 1, 3) myAr2(i, 2) = myAr(i - 1, 5) Else myAr(i, 3) = tmpStrJ myAr(i, 5) = tmpStrE myAr2(i, 1) = tmpStrJ myAr2(i, 2) = tmpStrE End If Next i Set mySht = Worksheets.Add With mySht .Range("A1").Value = "Item_Number" .Range("B1").Value = "上位食品名(日)" .Range("C1").Value = "上位食品名(英)" .Range("A2:C450") = myAr2 End With Erase ItemNumAr Erase Cancel_Ar Erase GroupAr Erase myAr Erase myAr2 Set mySht = Nothing Set myRng = Nothing Set myRegExp1 = Nothing Set myRegExp2 = Nothing End Sub