comparison js/taggingtext.js @ 89:e681d693240e extractapp

new: generated regex to SmartRegex
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Wed, 03 Jun 2015 16:54:09 +0200
parents fb5049fc5dd7
children a05491461199
comparison
equal deleted inserted replaced
88:61593b047289 89:e681d693240e
576 }); 576 });
577 577
578 function genRegexWindowOpen(){ 578 function genRegexWindowOpen(){
579 var btn_state = $('#regex_generator').css('display'); 579 var btn_state = $('#regex_generator').css('display');
580 if (btn_state == "block") { 580 if (btn_state == "block") {
581 $("#regex_generator").css("display", "none"); 581 genRegexWindowClose();
582 $("#gen_regex_window_open_id").text("Open Gen Regex");
583 } else { 582 } else {
584 $('#regex_generator').css("display", "block"); 583 $('#regex_generator').css("display", "block");
585 $("#gen_regex_window_open_id").text("Close Gen Regex"); 584 $("#gen_regex_window_open_id").text("Close Gen Regex");
586 } 585 }
587 } 586 }
588 function genRegexWindowClose(){ 587 function genRegexWindowClose(){
589 $('#regex_generator').css("display", "none"); 588 $('#regex_generator').css("display", "none");
589 $("#gen_regex_window_open_id").text("Open Gen Regex");
590 } 590 }
591 591
592 function sharedStart_(array){ 592 function sharedStart_(array){
593 var A= array.concat().sort(), 593 var A= array.concat().sort(),
594 a1= A[0], a2= A[A.length-1], L= a1.length, i= 0; 594 a1= A[0], a2= A[A.length-1], L= a1.length, i= 0;
595 while(i<L && a1.charAt(i)=== a2.charAt(i)) i++; 595 while(i<L && a1.charAt(i)=== a2.charAt(i)) i++;
596 return a1.substring(0, i); 596 return a1.substring(0, i);
597 }
598
599 function getTagNameByTag(tag){
600 var name = "";
601 // taglistArray is a global variable
602 for (var i = 0; i < taglistArray.length; i++) {
603 var taglistValue = taglistArray[i];
604
605 var _tag = taglistValue[2];
606 var _name = taglistValue[1];
607 if (_tag == tag) {
608 name = _name;
609 }
610 }
611
612 return name;
613 }
614
615 function genRegexAddToSmartRegex() {
616 // append blocks of generated regex to smart regex
617
618 smartRegexEmpty(); // clear
619
620 var reg_obj = getSuggestedRegex();
621 console.log(reg_obj);
622
623 for (var i = 0; i < reg_obj.length; i++) {
624 if (reg_obj[i].txt != "") {
625
626 var newdiv = document.createElement("span");
627
628 $(newdiv).css("border", "1px solid black");
629 $(newdiv).css("width", "100px");
630
631 if (reg_obj[i].tag == null) {
632 if (reg_obj[i].txt == "○") {
633 $(newdiv).text("空白");
634 } else {
635 $(newdiv).text(reg_obj[i].txt);
636 }
637 $(newdiv).attr("class", "span_NOTAG");
638 $(newdiv).attr("regexReplace","NOTAG");
639 } else {
640
641 var name = getTagNameByTag(reg_obj[i].tag);
642 $(newdiv).text(name+"名");
643
644 $(newdiv).attr("class", "span_"+reg_obj[i].tag);
645 $(newdiv).attr("regexReplace", reg_obj[i].tag);
646
647 }
648
649 regex_element_index += 1;
650 $(newdiv).attr("id", "regex_elem_"+regex_element_index);
651
652 $(newdiv).attr("regexText", reg_obj[i].txt);
653
654 $('#smartRegexShowDiv').append(newdiv);
655
656 }
657 }
658
597 } 659 }
598 660
599 function longestCommonSubstring_(string1, string2){ 661 function longestCommonSubstring_(string1, string2){
600 // init max value 662 // init max value
601 var longestCommonSubstring = 0; 663 var longestCommonSubstring = 0;
656 } 718 }
657 return s1.substring(start_idx, (start_idx + max_len)); 719 return s1.substring(start_idx, (start_idx + max_len));
658 720
659 } 721 }
660 722
661 function getRegex(_pattern) { 723 var suggestedRegex = [];
724
725 function setSuggestedRegex(_pattern) {
726 /*
662 console.log(_pattern[0]); 727 console.log(_pattern[0]);
663 console.log(_pattern[1]); 728 console.log(_pattern[1]);
729 */
664 var p0 = _pattern[0]; 730 var p0 = _pattern[0];
665 var p1 = _pattern[1]; 731 var p1 = _pattern[1];
666 732
667 // TODO: find common pattern 733 // TODO: find common pattern
668 var reg_str = ""; 734 suggestedRegex = []; // it's a global variable
669 // _p1 = 測試 735
670 // _p2 = 測<tag_name>試</tag_name>一下
671 var combined = [];
672 if (p0.length > p1.length) { 736 if (p0.length > p1.length) {
673 combined = p0; 737 suggestedRegex = p0;
674 } else if(p0.length < p1.length) { 738 } else if(p0.length < p1.length) {
675 combined = p1; 739 suggestedRegex = p1;
676 } else { // equal length 740 } else { // equal length
677 // find matching string 741 // find matching string
678 var cnt = p0.length; 742 var cnt = p0.length;
679 for (var i = 0; i < cnt; i++) { 743 for (var i = 0; i < cnt; i++) {
680 if (p1[i].tag != null) { 744 if (p1[i].tag != null) {
681 combined.push({tag:p1[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p1[i].txt.length+"}"}); 745 suggestedRegex.push({tag:p1[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p1[i].txt.length+"}"});
682 } else if (p0[i].tag != null) { 746 } else if (p0[i].tag != null) {
683 combined.push({tag:p0[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p0[i].txt.length+"}"}); 747 suggestedRegex.push({tag:p0[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p0[i].txt.length+"}"});
684 } else { 748 } else {
685 // find matching for text in each corresponding position 749 // find matching for text in each corresponding position
686 var texts = [p0[i].txt, p1[i].txt]; 750 var texts = [p0[i].txt, p1[i].txt];
687 var common = longestCommonSubstring(p0[i].txt, p1[i].txt); 751 var common = longestCommonSubstring(p0[i].txt, p1[i].txt);
688 752
692 common[i]; 756 common[i];
693 reg_for_common += common[i]+"|"; 757 reg_for_common += common[i]+"|";
694 }; 758 };
695 reg_for_common += "]"; 759 reg_for_common += "]";
696 */ 760 */
697 combined.push({tag:null, txt:common}); 761 suggestedRegex.push({tag:null, txt:common});
698 } 762 }
699 }; 763 };
700 } 764 }
701 765
702 for (var i = 0; i < combined.length; i++) { 766
703 reg_str += combined[i].txt; 767 }
704 }; 768
705 769 function getSuggestedRegex(){
706 return reg_str; 770 return suggestedRegex;
707 } 771 }
708 772
709 var pattern_obj = []; // record pattern array for regex generator. only contain pattern1 and pattern2 773 var pattern_obj = []; // record pattern array for regex generator. only contain pattern1 and pattern2
710 774
711 function genRegexBySelection(tag_item_div, _selection) { 775 function genRegexBySelection(tag_item_div, _selection) {
712 var add_gen_regex_button = document.createElement("button"); 776 var add_gen_regex_button = document.createElement("button");
713 $(add_gen_regex_button).id = "addToGenRegex"; 777 $(add_gen_regex_button).id = "addToGenRegex";
714 $(add_gen_regex_button).addClass("btn btn-md"); 778 $(add_gen_regex_button).addClass("btn btn-md");
715 $(add_gen_regex_button).click( function(){ 779 $(add_gen_regex_button).click( function(){
716 // popup for selected words regex gen 780 // popup for selected words regex gen
781 /*
717 console.log("Debug: "); 782 console.log("Debug: ");
718 console.log(_selection); 783 console.log(_selection);
719 784 */
785
720 if (_selection.type == "Range") { 786 if (_selection.type == "Range") {
721 // select words, not just click on text 787 // select words, not just click on text
722 var anchor_node = _selection.anchorNode; 788 var anchor_node = _selection.anchorNode;
723 var focus_node = _selection.focusNode; 789 var focus_node = _selection.focusNode;
724 var sibling_node = anchor_node.nextElementSibling; 790 var sibling_node = anchor_node.nextElementSibling;
749 815
750 $(seleted_div).text(text_before+tagged_text+text_after); 816 $(seleted_div).text(text_before+tagged_text+text_after);
751 seleted_obj.push({tag:null, txt:text_before}); 817 seleted_obj.push({tag:null, txt:text_before});
752 seleted_obj.push({tag:tag_name, txt:tagged_text}); 818 seleted_obj.push({tag:tag_name, txt:tagged_text});
753 seleted_obj.push({tag:null, txt:text_after}); 819 seleted_obj.push({tag:null, txt:text_after});
754 820 /*
755
756 console.log(text_before); 821 console.log(text_before);
757 console.log(tag_name); 822 console.log(tag_name);
758 console.log(tagged_text); 823 console.log(tagged_text);
759 console.log(text_after); 824 console.log(text_after);
825 */
760 } 826 }
761 827
762 828
763 var generated_regex = ""; 829
830 var generated_regex_plaintext = "";
764 // show generate regex window 831 // show generate regex window
765 $('#regex_generator').css("display", "block"); 832 $('#regex_generator').css("display", "block");
766 $("#gen_regex_window_open_id").text("Close Gen Regex"); 833 $("#gen_regex_window_open_id").text("Close Gen Regex");
767 834
768 //var seleted_text = String(_selection).replace(/^\s+|\s+$/g,''); 835 //var seleted_text = String(_selection).replace(/^\s+|\s+$/g,'');
773 pattern_obj.push(seleted_obj); 840 pattern_obj.push(seleted_obj);
774 // pattern1.text(seleted_div.text()); 841 // pattern1.text(seleted_div.text());
775 } else if (pattern2.children().length == 0) { 842 } else if (pattern2.children().length == 0) {
776 pattern2.append(seleted_div); 843 pattern2.append(seleted_div);
777 pattern_obj.push(seleted_obj); 844 pattern_obj.push(seleted_obj);
778 //pattern2.text(seleted_div.text()); 845
779 generated_regex = getRegex(pattern_obj); 846 setSuggestedRegex(pattern_obj);
847 var generated_regex = getSuggestedRegex();
848
849 // get plaintext from generated_regex obj
850 for (var i = 0; i < generated_regex.length; i++) {
851 generated_regex_plaintext += generated_regex[i].txt;
852 }
780 853
781 } else { 854 } else {
782 // pattern1 and pattern2 are already having text 855 // pattern1 and pattern2 are already having text
783 pattern1.children().remove(); 856 pattern1.children().remove();
784 pattern1.append(pattern2.children()); 857 pattern1.append(pattern2.children());
786 pattern2.children().remove(); 859 pattern2.children().remove();
787 pattern2.append(seleted_div); 860 pattern2.append(seleted_div);
788 861
789 pattern_obj.shift(); 862 pattern_obj.shift();
790 pattern_obj.push(seleted_obj); 863 pattern_obj.push(seleted_obj);
791 864
792 //pattern1.text(pattern2.text()); 865 setSuggestedRegex(pattern_obj);
793 //pattern2.text(seleted_div); 866 var generated_regex = getSuggestedRegex();
794 generated_regex = getRegex(pattern_obj); 867
868 // get plaintext from generated_regex obj
869 for (var i = 0; i < generated_regex.length; i++) {
870 generated_regex_plaintext += generated_regex[i].txt;
871 }
872
795 873
796 } 874 }
797 $('#generated_regex').text(generated_regex); 875 //$('#generated_regex').text(generated_regex);
876 $('#generated_regex').text(generated_regex_plaintext);
798 // --- 877 // ---
799 878
800 $('#regex_generator_error_msg').text(""); 879 $('#regex_generator_error_msg').text("");
801 } else { 880 } else {
802 $('#regex_generator_error_msg').text("Note: Not a valid selection for regex generator."); 881 $('#regex_generator_error_msg').text("Note: Not a valid selection for regex generator.");